From 24aa7b423890b5cd4e7cc07d99bed229a6166c31 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 16 Jun 2016 14:15:32 +0200 Subject: [PATCH 01/15] test-zstd-versions.py: create dictionaries for v0.5.1 and newer --- tests/test-zstd-versions.py | 63 +++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 437cd4c0..3c8eac7d 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -9,6 +9,7 @@ import os import shutil import subprocess import sys +from subprocess import Popen, PIPE repo_url = 'https://github.com/Cyan4973/zstd.git' tmp_dir_name = 'tests/versionsTest' @@ -17,6 +18,27 @@ git_cmd = 'git' test_dat_src = 'README.md' test_dat = 'test_dat' head = 'vdevel' +dict_source = 'dict_source' +dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c ./zstd/lib/decompress/*.c ./zstd/lib/dictBuilder/*.c ./zstd/lib/legacy/*.c ' +dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h' + + +def execute(command, print_output=False, print_error=True): + popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=True) + itout = iter(popen.stdout.readline, b"") + iterr = iter(popen.stderr.readline, b"") + stdout_lines = b''.join(list(itout)).decode("utf-8") + if print_output: + print(stdout_lines) + stderr_lines = b''.join(list(iterr)).decode("utf-8") + if print_output: + print(stderr_lines) + popen.communicate() + if popen.returncode is not None and popen.returncode != 0: + if not print_output and print_error: + print(stderr_lines) + raise RuntimeError(stderr_lines) + return stdout_lines + stderr_lines def proc(cmd_args, pipe=True, dummy=False): @@ -85,7 +107,7 @@ def remove_duplicates(): print('duplicated : {} == {}'.format(ref_zst, compared_zst)) -def decompress_zst(tag): +def decompress_zst(tag, zstd_up_to_v05=False): dec_error = 0 list_zst = sorted(glob.glob('*.zst')) try: @@ -96,7 +118,11 @@ def decompress_zst(tag): print(file_zst, end=' ') print(tag, end=' ') file_dec = file_zst + '_d64_' + tag + '.dec' - if subprocess.call(['./zstd.' + tag, '-df', file_zst, '-o', file_dec], stderr=DEVNULL) == 0: + if zstd_up_to_v05: + params = ['./zstd.' + tag, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] + if subprocess.call(params, stderr=DEVNULL) == 0: if not filecmp.cmp(file_dec, test_dat): print('ERR !! ') dec_error = 1 @@ -107,12 +133,24 @@ def decompress_zst(tag): return dec_error +def create_dict(tag, dict_source_path): + dict_name = 'dict.' + tag + if not os.path.isfile(dict_name): + cFiles = glob.glob(dict_source_path + "/*.c") + hFiles = glob.glob(dict_source_path + "/*.h") + execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name) + print(dict_name + ' created') + else: + print(dict_name + ' already exists') + + if __name__ == '__main__': error_code = 0 - base_dir = os.getcwd() + '/..' # /path/to/zstd - tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest - clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd - programs_dir = base_dir + '/programs' # /path/to/zstd/programs + base_dir = os.getcwd() + '/..' # /path/to/zstd + tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest + clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd + dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source + programs_dir = base_dir + '/programs' # /path/to/zstd/programs os.makedirs(tmp_dir, exist_ok=True) # since Travis clones limited depth, we should clone full repository @@ -151,15 +189,28 @@ if __name__ == '__main__': for dec in glob.glob("*.dec"): os.remove(dec) + # copy *.c and *.h to a temporary directory ("dict_source") + if not os.path.isdir(dict_source_path): + os.mkdir(dict_source_path) + print('cp ' + dict_files + ' ' + dict_source_path) + subprocess.call(['cp ' + dict_files + ' ' + dict_source_path], shell=True) + + dictFiles = glob.glob("dict*") + print('dictFiles=' + str(dictFiles)) + print('Compress test.dat by all released zstd') error_code = 0 for tag in tags: print(tag) + if tag >= 'v0.5.1': + create_dict(tag, dict_source_path) compress_sample(tag, test_dat) remove_duplicates() if tag >= 'v0.5.1': error_code += decompress_zst(tag) + else: + error_code += decompress_zst(tag, zstd_up_to_v05=True) print('') print('Enumerate different compressed files') From 150152fb8a0b2a5b3938f41a28cbe4eb7129fd16 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 16 Jun 2016 19:29:09 +0200 Subject: [PATCH 02/15] test-zstd-versions.py: test dictiony compression for v0.5.1 and newer --- tests/test-zstd-versions.py | 79 +++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 3c8eac7d..dffa79dd 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -67,21 +67,44 @@ def get_git_tags(): return tags +def create_dict(tag, dict_source_path): + dict_name = 'dict.' + tag + if not os.path.isfile(dict_name): + cFiles = glob.glob(dict_source_path + "/*.c") + hFiles = glob.glob(dict_source_path + "/*.h") + execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name) + print(dict_name + ' created') + else: + print(dict_name + ' already exists') + + +def dict_compress_sample(tag, sample): + dict_name = 'dict.' + tag + try: + from subprocess import DEVNULL # py3k + except ImportError: + DEVNULL = open(os.devnull, 'wb') + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample, '-o', sample + '_01_64' + tag + '_dict.zst'], stderr=DEVNULL) + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + print(tag + " : dict compression completed") + + def compress_sample(tag, sample): try: from subprocess import DEVNULL # py3k except ImportError: DEVNULL = open(os.devnull, 'wb') if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_01_64_' + tag + '.zst') + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodic.zst') if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_05_64_' + tag + '.zst') + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodic.zst') if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_09_64_' + tag + '.zst') + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodic.zst') if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_15_64_' + tag + '.zst') + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodic.zst') if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_18_64_' + tag + '.zst') + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodic.zst') # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : compression completed") @@ -94,7 +117,7 @@ def sha1_of_file(filepath): def remove_duplicates(): - list_of_zst = sorted(glob.glob('*.zst')) + list_of_zst = sorted(glob.glob('*_nodic.zst')) for i, ref_zst in enumerate(list_of_zst): if not os.path.isfile(ref_zst): continue @@ -109,7 +132,7 @@ def remove_duplicates(): def decompress_zst(tag, zstd_up_to_v05=False): dec_error = 0 - list_zst = sorted(glob.glob('*.zst')) + list_zst = sorted(glob.glob('*_nodic.zst')) try: from subprocess import DEVNULL # py3k except ImportError: @@ -133,15 +156,37 @@ def decompress_zst(tag, zstd_up_to_v05=False): return dec_error -def create_dict(tag, dict_source_path): - dict_name = 'dict.' + tag - if not os.path.isfile(dict_name): - cFiles = glob.glob(dict_source_path + "/*.c") - hFiles = glob.glob(dict_source_path + "/*.h") - execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name) - print(dict_name + ' created') - else: - print(dict_name + ' already exists') +def decompress_dict(tag, zstd_up_to_v05=False): + dec_error = 0 + list_zst = sorted(glob.glob('*_dict.zst')) + try: + from subprocess import DEVNULL # py3k + except ImportError: + DEVNULL = open(os.devnull, 'wb') + for file_zst in list_zst: + dict_tag = file_zst[0:len(file_zst)-9] # remove "_dict.zst" + dict_tag = dict_tag[dict_tag.rfind('v'):] + if dict_tag == 'vel': + dict_tag = head + dict_name = 'dict.' + dict_tag + #print("dict_tag=" + dict_tag + " dict_name=" + dict_name) + print(file_zst, end=' ') + print(tag, end=' ') + print(dict_tag, end=' ') + file_dec = file_zst + '_d64_' + tag + '.dec' + if zstd_up_to_v05: + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] + else: + params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] + if subprocess.call(params, stderr=DEVNULL) == 0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + return dec_error if __name__ == '__main__': @@ -205,6 +250,8 @@ if __name__ == '__main__': print(tag) if tag >= 'v0.5.1': create_dict(tag, dict_source_path) + dict_compress_sample(tag, test_dat) + decompress_dict(tag) compress_sample(tag, test_dat) remove_duplicates() if tag >= 'v0.5.1': From d1af4e66b6e849ab075768376397f2cc619ffb01 Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 16 Jun 2016 20:23:11 +0200 Subject: [PATCH 03/15] test-zstd-versions.py: dictionary compression with levels 1, 5, 9, 15, 18 --- tests/test-zstd-versions.py | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index dffa79dd..dd5b8eb8 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -84,7 +84,11 @@ def dict_compress_sample(tag, sample): from subprocess import DEVNULL # py3k except ImportError: DEVNULL = open(os.devnull, 'wb') - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample, '-o', sample + '_01_64' + tag + '_dict.zst'], stderr=DEVNULL) + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample, '-o', sample + '_01_64_' + tag + '_dictio.zst'], stderr=DEVNULL) + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample, '-o', sample + '_05_64_' + tag + '_dictio.zst'], stderr=DEVNULL) + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample, '-o', sample + '_09_64_' + tag + '_dictio.zst'], stderr=DEVNULL) + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample, '-o', sample + '_15_64_' + tag + '_dictio.zst'], stderr=DEVNULL) + subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample, '-o', sample + '_18_64_' + tag + '_dictio.zst'], stderr=DEVNULL) # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : dict compression completed") @@ -96,15 +100,15 @@ def compress_sample(tag, sample): except ImportError: DEVNULL = open(os.devnull, 'wb') if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodic.zst') + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodic.zst') + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodic.zst') + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodic.zst') + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodic.zst') + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst') # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : compression completed") @@ -117,7 +121,7 @@ def sha1_of_file(filepath): def remove_duplicates(): - list_of_zst = sorted(glob.glob('*_nodic.zst')) + list_of_zst = sorted(glob.glob('*.zst')) for i, ref_zst in enumerate(list_of_zst): if not os.path.isfile(ref_zst): continue @@ -132,7 +136,7 @@ def remove_duplicates(): def decompress_zst(tag, zstd_up_to_v05=False): dec_error = 0 - list_zst = sorted(glob.glob('*_nodic.zst')) + list_zst = sorted(glob.glob('*_nodict.zst')) try: from subprocess import DEVNULL # py3k except ImportError: @@ -158,21 +162,19 @@ def decompress_zst(tag, zstd_up_to_v05=False): def decompress_dict(tag, zstd_up_to_v05=False): dec_error = 0 - list_zst = sorted(glob.glob('*_dict.zst')) + list_zst = sorted(glob.glob('*_dictio.zst')) try: from subprocess import DEVNULL # py3k except ImportError: DEVNULL = open(os.devnull, 'wb') for file_zst in list_zst: - dict_tag = file_zst[0:len(file_zst)-9] # remove "_dict.zst" - dict_tag = dict_tag[dict_tag.rfind('v'):] - if dict_tag == 'vel': + dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" + if head in dict_tag: # find vdevel dict_tag = head + else: + dict_tag = dict_tag[dict_tag.rfind('v'):] dict_name = 'dict.' + dict_tag - #print("dict_tag=" + dict_tag + " dict_name=" + dict_name) - print(file_zst, end=' ') - print(tag, end=' ') - print(dict_tag, end=' ') + print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ') file_dec = file_zst + '_d64_' + tag + '.dec' if zstd_up_to_v05: params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] @@ -186,6 +188,7 @@ def decompress_dict(tag, zstd_up_to_v05=False): print('OK ') else: print('command does not work') + dec_error = 1 return dec_error @@ -251,7 +254,8 @@ if __name__ == '__main__': if tag >= 'v0.5.1': create_dict(tag, dict_source_path) dict_compress_sample(tag, test_dat) - decompress_dict(tag) + remove_duplicates() + error_code += decompress_dict(tag) compress_sample(tag, test_dat) remove_duplicates() if tag >= 'v0.5.1': From 19cab46f2f2ae3e512c00df98043083101410810 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 17 Jun 2016 12:54:52 +0200 Subject: [PATCH 04/15] Joined `seqStore` initialization at dispatch point --- .gitignore | 1 + Makefile | 1 + lib/compress/zstd_compress.c | 11 ++--------- lib/compress/zstd_opt.h | 2 -- programs/.gitignore | 1 + 5 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index a06c2afa..18165240 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.dylib # Executables +zstd *.exe *.out *.app diff --git a/Makefile b/Makefile index 18db0424..77a67a23 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ all: zstdprogram: $(MAKE) -C $(PRGDIR) + mv $(PRGDIR)/zstd . zlibwrapper: $(MAKE) -C $(ZSTDDIR) all diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1ae32183..b8d1d2c0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1128,7 +1128,6 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1]; /* init */ - ZSTD_resetSeqStore(seqStorePtr); ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); if (offset_1 > maxRep) offset_1 = 0; @@ -1239,7 +1238,6 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; /* init */ - ZSTD_resetSeqStore(seqStorePtr); /* skip first position to avoid read overflow during repcode match check */ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); ip++; @@ -1743,7 +1741,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* init */ ip += (ip==base); ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); { U32 i; U32 const maxRep = (U32)(ip-base); for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ip += (ip == prefixStart); /* Match Loop */ @@ -2097,11 +2093,7 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][6] = { -#if 1 { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, -#else - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, -#endif { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } }; @@ -2111,8 +2103,9 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + ZSTD_resetSeqStore(&(zc->seqStore)); blockCompressor(zc, src, srcSize); return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); } diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 8b15bf6a..97b1623b 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -465,7 +465,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); { U32 i; for (i=0; irep[i]; } @@ -757,7 +756,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); diff --git a/programs/.gitignore b/programs/.gitignore index 5f50de0d..cbe39dcd 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -50,3 +50,4 @@ afl # Misc files *.bat fileTests.sh +dirTest* From 4545671b4445b30f8b67d6e374c2d563db6b0675 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 17 Jun 2016 13:39:43 +0200 Subject: [PATCH 05/15] test-zstd-versions.py: fixed DEVNULL --- tests/test-zstd-versions.py | 44 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index dd5b8eb8..499f8618 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -7,9 +7,9 @@ import glob import hashlib import os import shutil -import subprocess import sys -from subprocess import Popen, PIPE +import subprocess +from subprocess import Popen, PIPE, DEVNULL repo_url = 'https://github.com/Cyan4973/zstd.git' tmp_dir_name = 'tests/versionsTest' @@ -25,31 +25,25 @@ dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h def execute(command, print_output=False, print_error=True): popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=True) - itout = iter(popen.stdout.readline, b"") - iterr = iter(popen.stderr.readline, b"") - stdout_lines = b''.join(list(itout)).decode("utf-8") + stdout_lines, stderr_lines = popen.communicate() + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") if print_output: print(stdout_lines) - stderr_lines = b''.join(list(iterr)).decode("utf-8") - if print_output: print(stderr_lines) - popen.communicate() if popen.returncode is not None and popen.returncode != 0: if not print_output and print_error: print(stderr_lines) - raise RuntimeError(stderr_lines) - return stdout_lines + stderr_lines + return popen.returncode def proc(cmd_args, pipe=True, dummy=False): if dummy: return if pipe: - subproc = subprocess.Popen(cmd_args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) else: - subproc = subprocess.Popen(cmd_args) + subproc = Popen(cmd_args) return subproc.communicate() @@ -72,18 +66,16 @@ def create_dict(tag, dict_source_path): if not os.path.isfile(dict_name): cFiles = glob.glob(dict_source_path + "/*.c") hFiles = glob.glob(dict_source_path + "/*.h") - execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name) - print(dict_name + ' created') + if execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False) == 0: + print(dict_name + ' created') + else: + print('ERROR: creating of ' + dict_name + ' failed') else: print(dict_name + ' already exists') def dict_compress_sample(tag, sample): dict_name = 'dict.' + tag - try: - from subprocess import DEVNULL # py3k - except ImportError: - DEVNULL = open(os.devnull, 'wb') subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample, '-o', sample + '_01_64_' + tag + '_dictio.zst'], stderr=DEVNULL) subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample, '-o', sample + '_05_64_' + tag + '_dictio.zst'], stderr=DEVNULL) subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample, '-o', sample + '_09_64_' + tag + '_dictio.zst'], stderr=DEVNULL) @@ -95,10 +87,6 @@ def dict_compress_sample(tag, sample): def compress_sample(tag, sample): - try: - from subprocess import DEVNULL # py3k - except ImportError: - DEVNULL = open(os.devnull, 'wb') if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: @@ -137,10 +125,6 @@ def remove_duplicates(): def decompress_zst(tag, zstd_up_to_v05=False): dec_error = 0 list_zst = sorted(glob.glob('*_nodict.zst')) - try: - from subprocess import DEVNULL # py3k - except ImportError: - DEVNULL = open(os.devnull, 'wb') for file_zst in list_zst: print(file_zst, end=' ') print(tag, end=' ') @@ -163,10 +147,6 @@ def decompress_zst(tag, zstd_up_to_v05=False): def decompress_dict(tag, zstd_up_to_v05=False): dec_error = 0 list_zst = sorted(glob.glob('*_dictio.zst')) - try: - from subprocess import DEVNULL # py3k - except ImportError: - DEVNULL = open(os.devnull, 'wb') for file_zst in list_zst: dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" if head in dict_tag: # find vdevel From 2ef16501cacc0c40b0c1d0d0da28707358b3a00b Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 17 Jun 2016 14:07:42 +0200 Subject: [PATCH 06/15] test-zstd-versions.py: improved error handling --- tests/test-zstd-versions.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 499f8618..e65c753b 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -23,8 +23,8 @@ dict_files = './zstd/programs/*.c ./zstd/lib/common/*.c ./zstd/lib/compress/*.c dict_files += './zstd/programs/*.h ./zstd/lib/common/*.h ./zstd/lib/compress/*.h ./zstd/lib/dictBuilder/*.h ./zstd/lib/legacy/*.h' -def execute(command, print_output=False, print_error=True): - popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=True) +def execute(command, print_output=False, print_error=True, param_shell=False): + popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) stdout_lines, stderr_lines = popen.communicate() stderr_lines = stderr_lines.decode("utf-8") stdout_lines = stdout_lines.decode("utf-8") @@ -66,7 +66,7 @@ def create_dict(tag, dict_source_path): if not os.path.isfile(dict_name): cFiles = glob.glob(dict_source_path + "/*.c") hFiles = glob.glob(dict_source_path + "/*.h") - if execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False) == 0: + if execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) == 0: print(dict_name + ' created') else: print('ERROR: creating of ' + dict_name + ' failed') @@ -133,7 +133,7 @@ def decompress_zst(tag, zstd_up_to_v05=False): params = ['./zstd.' + tag, '-df', file_zst, file_dec] else: params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] - if subprocess.call(params, stderr=DEVNULL) == 0: + if execute(params) == 0: if not filecmp.cmp(file_dec, test_dat): print('ERR !! ') dec_error = 1 @@ -160,7 +160,7 @@ def decompress_dict(tag, zstd_up_to_v05=False): params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] else: params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] - if subprocess.call(params, stderr=DEVNULL) == 0: + if execute(params) == 0: if not filecmp.cmp(file_dec, test_dat): print('ERR !! ') dec_error = 1 @@ -221,10 +221,7 @@ if __name__ == '__main__': if not os.path.isdir(dict_source_path): os.mkdir(dict_source_path) print('cp ' + dict_files + ' ' + dict_source_path) - subprocess.call(['cp ' + dict_files + ' ' + dict_source_path], shell=True) - - dictFiles = glob.glob("dict*") - print('dictFiles=' + str(dictFiles)) + execute('cp ' + dict_files + ' ' + dict_source_path, param_shell=True) print('Compress test.dat by all released zstd') From 7e3597bf38858c7b4b9fbc77b09c19feca3d128e Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 17 Jun 2016 14:43:24 +0200 Subject: [PATCH 07/15] test-zstd-versions.py: create and test dictionaries for v0.5.0 --- tests/test-zstd-versions.py | 46 ++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index e65c753b..b9a81521 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -9,7 +9,7 @@ import os import shutil import sys import subprocess -from subprocess import Popen, PIPE, DEVNULL +from subprocess import Popen, PIPE repo_url = 'https://github.com/Cyan4973/zstd.git' tmp_dir_name = 'tests/versionsTest' @@ -66,7 +66,11 @@ def create_dict(tag, dict_source_path): if not os.path.isfile(dict_name): cFiles = glob.glob(dict_source_path + "/*.c") hFiles = glob.glob(dict_source_path + "/*.h") - if execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) == 0: + if tag == 'v0.5.0': + result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + else: + result = execute('./zstd.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + if result == 0: print(dict_name + ' created') else: print('ERROR: creating of ' + dict_name + ' failed') @@ -76,17 +80,24 @@ def create_dict(tag, dict_source_path): def dict_compress_sample(tag, sample): dict_name = 'dict.' + tag - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample, '-o', sample + '_01_64_' + tag + '_dictio.zst'], stderr=DEVNULL) - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample, '-o', sample + '_05_64_' + tag + '_dictio.zst'], stderr=DEVNULL) - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample, '-o', sample + '_09_64_' + tag + '_dictio.zst'], stderr=DEVNULL) - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample, '-o', sample + '_15_64_' + tag + '_dictio.zst'], stderr=DEVNULL) - subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample, '-o', sample + '_18_64_' + tag + '_dictio.zst'], stderr=DEVNULL) + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst') + if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst') # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : dict compression completed") def compress_sample(tag, sample): + DEVNULL = open(os.devnull, 'wb') if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: @@ -122,14 +133,14 @@ def remove_duplicates(): print('duplicated : {} == {}'.format(ref_zst, compared_zst)) -def decompress_zst(tag, zstd_up_to_v05=False): +def decompress_zst(tag): dec_error = 0 list_zst = sorted(glob.glob('*_nodict.zst')) for file_zst in list_zst: print(file_zst, end=' ') print(tag, end=' ') file_dec = file_zst + '_d64_' + tag + '.dec' - if zstd_up_to_v05: + if tag <= 'v0.5.0': params = ['./zstd.' + tag, '-df', file_zst, file_dec] else: params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] @@ -144,7 +155,7 @@ def decompress_zst(tag, zstd_up_to_v05=False): return dec_error -def decompress_dict(tag, zstd_up_to_v05=False): +def decompress_dict(tag): dec_error = 0 list_zst = sorted(glob.glob('*_dictio.zst')) for file_zst in list_zst: @@ -153,10 +164,12 @@ def decompress_dict(tag, zstd_up_to_v05=False): dict_tag = head else: dict_tag = dict_tag[dict_tag.rfind('v'):] + if tag == 'v0.6.0' and dict_tag < 'v0.6.0': + continue dict_name = 'dict.' + dict_tag print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ') file_dec = file_zst + '_d64_' + tag + '.dec' - if zstd_up_to_v05: + if tag <= 'v0.5.0': params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] else: params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] @@ -203,6 +216,10 @@ if __name__ == '__main__': os.makedirs(r_dir, exist_ok=True) os.chdir(clone_dir) git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) + if tag == 'v0.5.0': + os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder + make(['clean', 'dictBuilder'], False) + shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest//programs make(['clean', 'zstd'], False) else: @@ -228,17 +245,14 @@ if __name__ == '__main__': error_code = 0 for tag in tags: print(tag) - if tag >= 'v0.5.1': + if tag >= 'v0.5.0': create_dict(tag, dict_source_path) dict_compress_sample(tag, test_dat) remove_duplicates() error_code += decompress_dict(tag) compress_sample(tag, test_dat) remove_duplicates() - if tag >= 'v0.5.1': - error_code += decompress_zst(tag) - else: - error_code += decompress_zst(tag, zstd_up_to_v05=True) + error_code += decompress_zst(tag) print('') print('Enumerate different compressed files') From e16f65675b0096ecc83828e3d6c72bddae4c69f8 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 17 Jun 2016 15:17:35 +0200 Subject: [PATCH 08/15] test-zstd-versions.py: fixed creation of dictionaries for v0.5.1+ --- tests/test-zstd-versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index b9a81521..e5e5ec90 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -69,7 +69,7 @@ def create_dict(tag, dict_source_path): if tag == 'v0.5.0': result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) else: - result = execute('./zstd.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) if result == 0: print(dict_name + ' created') else: From 06d9a73b4816b27b862fa8d823d8db78ed58e631 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 19 Jun 2016 14:27:21 +0200 Subject: [PATCH 09/15] minor refactor, using `WILDCOPY_OVERLENGTH` macro instead of hard-coded 8 --- Makefile | 1 + lib/compress/zstd_compress.c | 2 +- lib/decompress/zstd_decompress.c | 29 ++++++++++++++--------------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 77a67a23..f0d39ea1 100644 --- a/Makefile +++ b/Makefile @@ -64,6 +64,7 @@ clean: @$(MAKE) -C $(ZSTDDIR) $@ > $(VOID) @$(MAKE) -C $(PRGDIR) $@ > $(VOID) @$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID) + @rm -f zstd @echo Cleaning completed diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b8d1d2c0..7f71395d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -246,7 +246,7 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface, for paramagrill */ { - ZSTD_CCtx* zc = ZSTD_createCCtx(); + ZSTD_CCtx* const zc = ZSTD_createCCtx(); ZSTD_parameters params; memset(¶ms, 0, sizeof(params)); params.cParams = cParams; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index b22021ed..816f7367 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -749,7 +749,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || + if (MEM_32bits() | (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); /* ANS state update */ @@ -765,23 +765,22 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit_8, + const BYTE** litPtr, const BYTE* const litLimit_w, const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_8 = oend-8; + BYTE* const oend_w = oend-WILDCOPY_OVERLENGTH; const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; /* check */ - if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ - if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ - if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */ /* copy Literals */ - ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ + ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ @@ -821,10 +820,10 @@ size_t ZSTD_execSequence(BYTE* op, op += 8; match += 8; if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_8) { - ZSTD_wildcopy(op, match, oend_8 - op); - match += oend_8 - op; - op = oend_8; + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; } while (op < oMatchEnd) *op++ = *match++; } else { @@ -845,7 +844,7 @@ static size_t ZSTD_decompressSequences( BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; - const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; + const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH; const BYTE* const litEnd = litPtr + dctx->litSize; FSE_DTable* DTableLL = dctx->LLTable; FSE_DTable* DTableML = dctx->MLTable; @@ -875,7 +874,7 @@ static size_t ZSTD_decompressSequences( for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; { seq_t const sequence = ZSTD_decodeSequence(&seqState); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } } @@ -888,8 +887,8 @@ static size_t ZSTD_decompressSequences( /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ - if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + //if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); memcpy(op, litPtr, lastLLSize); op += lastLLSize; } From e4811ba7612978ff8e616170985c57f58b02f2f0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 19 Jun 2016 23:06:54 +0200 Subject: [PATCH 10/15] Modified : ZSTD_createDDict() accepts dictionary < 8 bytes in pure content mode (reported by @chipturner) --- lib/decompress/zstd_decompress.c | 38 ++++++++++++++------------------ 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 816f7367..37aa403f 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1179,12 +1179,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } -static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { dctx->dictEnd = dctx->previousDstEnd; dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); dctx->base = dict; dctx->previousDstEnd = (const char*)dict + dictSize; + return 0; } static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize) @@ -1236,29 +1237,24 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - if (dictSize < 8) return ERROR(dictionary_corrupted); + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); { U32 const magic = MEM_readLE32(dict); if (magic != ZSTD_DICT_MAGIC) { - /* pure content mode */ - ZSTD_refDictContent(dctx, dict, dictSize); - return 0; - } - dctx->dictID = MEM_readLE32((const char*)dict + 4); + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + 4); - /* load entropy tables */ - dict = (const char*)dict + 8; - dictSize -= 8; - { size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize); - if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + eSize; - dictSize -= eSize; - } - - /* reference dictionary content */ - ZSTD_refDictContent(dctx, dict, dictSize); - - return 0; + /* load entropy tables */ + dict = (const char*)dict + 8; + dictSize -= 8; + { size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; } + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); } @@ -1318,7 +1314,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu } /*! ZSTD_createDDict() : -* Create a digested dictionary, ready to start decompression operation without startup delay. +* Create a digested dictionary, ready to start decompression without startup delay. * `dict` can be released after `ZSTD_DDict` creation */ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) { From cbf8c52954db5f442e9da873be367853125101fa Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 20 Jun 2016 16:18:19 +0200 Subject: [PATCH 11/15] roundTripCrash automatic compression level now depends on first 128 bytes --- programs/roundTripCrash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/programs/roundTripCrash.c b/programs/roundTripCrash.c index 1b6e1d7b..d5b673f4 100644 --- a/programs/roundTripCrash.c +++ b/programs/roundTripCrash.c @@ -36,8 +36,14 @@ #include /* fprintf */ #include /* stat */ #include /* stat */ +#include "xxhash.h" #include "zstd.h" +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + /** roundTripTest() : * Compresses `srcBuff` into `compressedBuff`, * then decompresses `compressedBuff` into `resultBuff`. @@ -51,7 +57,9 @@ static size_t roundTripTest(void* resultBuff, size_t resultBuffCapacity, const void* srcBuff, size_t srcBuffSize) { static const int maxClevel = 19; - int const cLevel = (!srcBuffSize) ? 1 : (*(const unsigned char*)srcBuff) % maxClevel; + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = XXH32(srcBuff, hashLength, 0); + int const cLevel = h32 % maxClevel; size_t const cSize = ZSTD_compress(compressedBuff, compressedBuffCapacity, srcBuff, srcBuffSize, cLevel); if (ZSTD_isError(cSize)) { fprintf(stderr, "Compression error : %s \n", ZSTD_getErrorName(cSize)); From c98f8e7b571bb2535d557be56c46f8346f2dca3e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 20 Jun 2016 16:31:24 +0200 Subject: [PATCH 12/15] Modified : zstd CLI enables checksum by default --- NEWS | 3 +++ programs/fileio.c | 2 +- programs/zstdcli.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index dc2d66bc..9a58c321 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v0.7.1 +modified : checksum enabled by default in command line + v0.7.0 New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski New : Command `--rm`, to remove source file after successful de/compression diff --git a/programs/fileio.c b/programs/fileio.c index decc971c..90041250 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -137,7 +137,7 @@ static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file ye void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; } static U32 g_dictIDFlag = 1; void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; } -static U32 g_checksumFlag = 0; +static U32 g_checksumFlag = 1; void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; } static U32 g_removeSrcFile = 0; void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b59c6ebd..bf40dadf 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -136,7 +136,7 @@ static int usage_advanced(const char* programName) #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n"); DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); - DISPLAY( "--check : enable integrity check\n"); + DISPLAY( "--[no-]check : integrity check (default:enabled)\n"); #endif #ifndef ZSTD_NODECOMPRESS DISPLAY( "--test : test compressed file integrity \n"); @@ -257,6 +257,7 @@ int main(int argCount, const char** argv) if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; } if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; } + if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; } if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; } if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; } From 5477cc25f7fb2ed3289f6b9854247dec1abb2852 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 20 Jun 2016 18:31:25 +0200 Subject: [PATCH 13/15] fixed corruption error related to inter-blocks rep-offset --- lib/compress/zstd_compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7f71395d..4bb982e6 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1186,8 +1186,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend-base); - cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend-base); + cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(1 GB); + cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(1 GB); /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1844,7 +1844,7 @@ _storeSequence: /* Save reps for next block */ { int i; for (i=0; ibase); /* in case some zero are left */ + if (!rep[i]) rep[i] = (U32)(1 GB); /* in case some zero are left */ ctx->savedRep[i] = rep[i]; } } From 4623d11571b3c17471b95828e1f559813402fdad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 20 Jun 2016 19:15:37 +0200 Subject: [PATCH 14/15] new correction, less extreme replacement value --- lib/compress/zstd_compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4bb982e6..964b690e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1186,8 +1186,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(1 GB); - cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(1 GB); + cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend-base)+2; + cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend-base)+2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1844,7 +1844,7 @@ _storeSequence: /* Save reps for next block */ { int i; for (i=0; ibase) + 2; /* in case some zero are left */ ctx->savedRep[i] = rep[i]; } } @@ -2238,7 +2238,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, /* preemptive overflow correction */ if (zc->lowLimit > (1<<30)) { - U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt); + U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) | (zc->params.cParams.strategy == ZSTD_btopt); U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1; U32 const newLowLimit = zc->lowLimit & chainMask; /* preserve position % chainSize */ U32 const correction = zc->lowLimit - newLowLimit; From a436a529bc854b957cd70723d471271e46c2c084 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 20 Jun 2016 23:34:04 +0200 Subject: [PATCH 15/15] minor : fast_extDict does no longer skip first byte --- NEWS | 3 +- lib/compress/zstd_compress.c | 70 +++++++++++++++++------------------- 2 files changed, 34 insertions(+), 39 deletions(-) diff --git a/NEWS b/NEWS index 9a58c321..0e8d87d6 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ v0.7.1 -modified : checksum enabled by default in command line +fixed : corruption issue, reported by cj +modified : checksum enabled by default in command line mode v0.7.0 New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 964b690e..1f94f196 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1024,11 +1024,12 @@ static unsigned ZSTD_NbCommonBytes (register size_t val) } -static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) { const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); - while ((pIn 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */ - mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */ + mLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); - } else { - if ( (matchIndex <= lowestIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + size_t offset; + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; } - mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; + mLength = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; offset = ip-match; - while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ - ip += mlCode; + ip += mLength; anchor = ip; if (ip <= ilimit) { @@ -1176,18 +1176,18 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ - size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; + size_t const rLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH); - ip += rlCode; + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ } } } /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend-base)+2; - cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend-base)+2; + cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend - base) + 1; + cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend - base) + 1; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1237,31 +1237,25 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const ilimit = iend - 8; U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; - /* init */ - /* skip first position to avoid read overflow during repcode match check */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ip++; - - /* Main Search Loop */ + /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ const size_t h = ZSTD_hashPtr(ip, hBits, mls); const U32 matchIndex = hashTable[h]; const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; const BYTE* match = matchBase + matchIndex; const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ const BYTE* repBase = repIndex < dictLimit ? dictBase : base; const BYTE* repMatch = repBase + repIndex; - size_t mlCode; - U32 offset; + size_t mLength; hashTable[h] = current; /* update hash table */ - if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; + mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); } else { if ( (matchIndex < lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { @@ -1270,16 +1264,17 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; - while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + U32 offset; + mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset = current - matchIndex; offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } /* found a match : store it */ - ip += mlCode; + ip += mLength; anchor = ip; if (ip <= ilimit) { @@ -1435,7 +1430,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co } } *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ if (matchEndIdx > current + 8) return matchEndIdx - current - 8; return 1; } @@ -1569,7 +1564,6 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B } - /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( ZSTD_CCtx* zc, @@ -1844,7 +1838,7 @@ _storeSequence: /* Save reps for next block */ { int i; for (i=0; ibase) + 2; /* in case some zero are left */ + if (!rep[i]) rep[i] = (U32)(iend - ctx->base) + 1; /* in case some zero are left */ ctx->savedRep[i] = rep[i]; } }