2017-08-31 19:20:50 +00:00
# ################################################################
2020-03-26 22:19:05 +00:00
# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
2015-01-24 00:58:16 +00:00
# All rights reserved.
2016-05-28 23:06:30 +00:00
#
2017-08-31 19:20:50 +00:00
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
2020-03-26 22:19:05 +00:00
# You may select, at your option, one of the above-listed licenses.
2017-08-31 19:20:50 +00:00
# ################################################################
2015-01-24 00:58:16 +00:00
2020-05-07 14:59:56 +00:00
Q = $( if $( filter 1,$( V) $( VERBOSE) ) ,,@)
2015-01-24 00:58:16 +00:00
# Version numbers
2016-07-17 18:42:21 +00:00
LIBVER_MAJOR_SCRIPT := ` sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_MINOR_SCRIPT := ` sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_PATCH_SCRIPT := ` sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
2015-12-30 18:52:08 +00:00
LIBVER_SCRIPT := $( LIBVER_MAJOR_SCRIPT) .$( LIBVER_MINOR_SCRIPT) .$( LIBVER_PATCH_SCRIPT)
LIBVER_MAJOR := $( shell echo $( LIBVER_MAJOR_SCRIPT) )
LIBVER_MINOR := $( shell echo $( LIBVER_MINOR_SCRIPT) )
LIBVER_PATCH := $( shell echo $( LIBVER_PATCH_SCRIPT) )
LIBVER := $( shell echo $( LIBVER_SCRIPT) )
2015-12-31 17:32:15 +00:00
VERSION ?= $( LIBVER)
perf improvements for zstd decode (#1668)
* perf improvements for zstd decode
tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge)
Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization.
See how GCC autovectorizes the loop here:
https://godbolt.org/z/apr0x0
Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on)
After: https://godbolt.org/z/OwO4F8
Note that autovectorization still does not do a good job on the optimized version, so it's turned off\
via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both.
silesia benchmark data - second triad of each file is with the original code:
file orig compressedratio encode decode change
1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s
2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s
3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s
1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60%
2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01%
3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99%
1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s
2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s
3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s
1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30%
2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20%
3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50%
1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s
2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s
3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s
1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76%
2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54%
3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89%
1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s
2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s
3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s
1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47%
2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72%
3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29%
1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s
2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s
3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s
1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15%
2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98%
3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45%
1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s
2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s
3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s
1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41%
2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42%
3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68%
1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s
2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s
3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s
1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26%
2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36%
3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38%
1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s
2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s
3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s
1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45%
2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93%
3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04%
1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s
2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s
3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s
1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70%
2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85%
3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31%
1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s
2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s
3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s
1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89%
2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43%
3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33%
1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s
2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s
3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s
1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24%
2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99%
3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05%
1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s
2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s
3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s
1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52%
2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00%
3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37%
7.51%
* makefile changed to only pass -fno-tree-vectorize to gcc
* <Replace this line with a title. Use 1 line only, 67 chars or less>
Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__)
* fix for warning/error with subtraction of void* pointers
* fix c90 conformance issue - ISO C90 forbids mixed declarations and code
* Fix assert for negative diff, only when there is no overlap
* fix overflow revealed in fuzzing tests
* tweak for small speed increase
2019-07-11 22:31:07 +00:00
CCVER := $( shell $( CC) --version)
2015-01-24 00:58:16 +00:00
2020-05-01 20:07:57 +00:00
CPPFLAGS += -DXXH_NAMESPACE= ZSTD_
2018-07-30 14:56:18 +00:00
i f e q ( $( OS ) , W i n d o w s _ N T ) # MinGW assumed
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
e n d i f
2018-08-27 02:29:12 +00:00
DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
2017-05-16 18:34:38 +00:00
-Wstrict-aliasing= 1 -Wswitch-enum -Wdeclaration-after-statement \
2019-02-01 09:31:02 +00:00
-Wstrict-prototypes -Wundef -Wpointer-arith \
2017-05-16 18:34:38 +00:00
-Wvla -Wformat= 2 -Winit-self -Wfloat-equal -Wwrite-strings \
2018-10-26 23:38:23 +00:00
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
2017-02-03 04:54:14 +00:00
CFLAGS += $( DEBUGFLAGS) $( MOREFLAGS)
2016-12-06 03:28:19 +00:00
FLAGS = $( CPPFLAGS) $( CFLAGS)
2015-01-24 00:58:16 +00:00
2018-12-03 23:46:55 +00:00
HAVE_COLORNEVER = $( shell echo a | grep --color= never a > /dev/null 2> /dev/null && echo 1 || echo 0)
GREP_OPTIONS ?=
i f e q ( $ H A V E _ C O L O R N E V E R , 1 )
GREP_OPTIONS += --color= never
e n d i f
GREP = grep $( GREP_OPTIONS)
2018-09-25 23:56:53 +00:00
2018-06-04 16:56:29 +00:00
ZSTDCOMMON_FILES := $( sort $( wildcard common/*.c) )
ZSTDCOMP_FILES := $( sort $( wildcard compress/*.c) )
ZSTDDECOMP_FILES := $( sort $( wildcard decompress/*.c) )
ZDICT_FILES := $( sort $( wildcard dictBuilder/*.c) )
ZDEPR_FILES := $( sort $( wildcard deprecated/*.c) )
ZSTD_FILES := $( ZSTDCOMMON_FILES)
perf improvements for zstd decode (#1668)
* perf improvements for zstd decode
tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge)
Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization.
See how GCC autovectorizes the loop here:
https://godbolt.org/z/apr0x0
Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on)
After: https://godbolt.org/z/OwO4F8
Note that autovectorization still does not do a good job on the optimized version, so it's turned off\
via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both.
silesia benchmark data - second triad of each file is with the original code:
file orig compressedratio encode decode change
1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s
2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s
3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s
1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60%
2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01%
3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99%
1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s
2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s
3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s
1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30%
2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20%
3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50%
1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s
2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s
3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s
1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76%
2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54%
3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89%
1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s
2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s
3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s
1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47%
2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72%
3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29%
1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s
2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s
3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s
1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15%
2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98%
3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45%
1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s
2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s
3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s
1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41%
2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42%
3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68%
1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s
2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s
3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s
1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26%
2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36%
3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38%
1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s
2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s
3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s
1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45%
2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93%
3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04%
1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s
2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s
3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s
1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70%
2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85%
3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31%
1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s
2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s
3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s
1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89%
2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43%
3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33%
1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s
2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s
3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s
1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24%
2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99%
3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05%
1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s
2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s
3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s
1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52%
2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00%
3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37%
7.51%
* makefile changed to only pass -fno-tree-vectorize to gcc
* <Replace this line with a title. Use 1 line only, 67 chars or less>
Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__)
* fix for warning/error with subtraction of void* pointers
* fix c90 conformance issue - ISO C90 forbids mixed declarations and code
* Fix assert for negative diff, only when there is no overlap
* fix overflow revealed in fuzzing tests
* tweak for small speed increase
2019-07-11 22:31:07 +00:00
i f e q ( $( findstring GCC ,$ ( CCVER ) ) , G C C )
decompress/zstd_decompress_block.o : CFLAGS +=-fno -tree -vectorize
e n d i f
2020-01-27 22:14:08 +00:00
# This is a helper variable that configures a bunch of other variables to new,
# space-optimized defaults.
ZSTD_LIB_MINIFY ?= 0
i f n e q ( $( ZSTD_LIB_MINIFY ) , 0 )
HAVE_CC_OZ ?= $( shell echo "" | $( CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
ZSTD_LEGACY_SUPPORT ?= 0
ZSTD_LIB_DEPRECATED ?= 0
HUF_FORCE_DECOMPRESS_X1 ?= 1
ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
ZSTD_NO_INLINE ?= 1
ZSTD_STRIP_ERROR_STRINGS ?= 1
ifneq ( $( HAVE_CC_OZ) , 0)
# Some compilers (clang) support an even more space-optimized setting.
CFLAGS += -Oz
else
CFLAGS += -Os
endif
CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
-DDYNAMIC_BMI2= 0 -DNDEBUG
e l s e
CFLAGS += -O3
e n d i f
# Modules
2018-06-04 16:56:29 +00:00
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
ZSTD_LIB_DICTBUILDER ?= 1
ZSTD_LIB_DEPRECATED ?= 1
2020-01-27 22:14:08 +00:00
# Legacy support
ZSTD_LEGACY_SUPPORT ?= 5
ZSTD_LEGACY_MULTITHREADED_API ?= 0
# Build size optimizations
2018-12-06 20:32:32 +00:00
HUF_FORCE_DECOMPRESS_X1 ?= 0
HUF_FORCE_DECOMPRESS_X2 ?= 0
ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
ZSTD_NO_INLINE ?= 0
ZSTD_STRIP_ERROR_STRINGS ?= 0
2018-06-05 00:19:06 +00:00
2018-06-04 16:56:29 +00:00
i f e q ( $( ZSTD_LIB_COMPRESSION ) , 0 )
ZSTD_LIB_DICTBUILDER = 0
2018-06-05 00:19:06 +00:00
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
e n d i f
i f e q ( $( ZSTD_LIB_DECOMPRESSION ) , 0 )
ZSTD_LEGACY_SUPPORT = 0
2018-06-05 00:19:06 +00:00
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_COMPRESSION ) , 0 )
2018-07-30 14:56:18 +00:00
ZSTD_FILES += $( ZSTDCOMP_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_DECOMPRESSION ) , 0 )
2018-07-30 14:56:18 +00:00
ZSTD_FILES += $( ZSTDDECOMP_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
2018-06-05 00:19:06 +00:00
i f n e q ( $( ZSTD_LIB_DEPRECATED ) , 0 )
ZSTD_FILES += $( ZDEPR_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_DICTBUILDER ) , 0 )
2018-06-05 00:19:06 +00:00
ZSTD_FILES += $( ZDICT_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
2017-03-13 21:32:30 +00:00
2018-12-06 20:32:32 +00:00
i f n e q ( $( HUF_FORCE_DECOMPRESS_X 1) , 0 )
CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
e n d i f
i f n e q ( $( HUF_FORCE_DECOMPRESS_X 2) , 0 )
CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
e n d i f
i f n e q ( $( ZSTD_FORCE_DECOMPRESS_SHORT ) , 0 )
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
e n d i f
i f n e q ( $( ZSTD_FORCE_DECOMPRESS_LONG ) , 0 )
CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
e n d i f
i f n e q ( $( ZSTD_NO_INLINE ) , 0 )
CFLAGS += -DZSTD_NO_INLINE
e n d i f
i f n e q ( $( ZSTD_STRIP_ERROR_STRINGS ) , 0 )
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
e n d i f
2019-04-08 01:47:52 +00:00
i f n e q ( $( ZSTD_LEGACY_MULTITHREADED_API ) , 0 )
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
e n d i f
2017-03-13 21:32:30 +00:00
i f n e q ( $( ZSTD_LEGACY_SUPPORT ) , 0 )
i f e q ( $( shell test $ ( ZSTD_LEGACY_SUPPORT ) -lt 8; echo $ $ ?) , 0 )
2018-09-25 23:56:53 +00:00
ZSTD_FILES += $( shell ls legacy/*.c | $( GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]' )
2017-03-13 21:32:30 +00:00
e n d i f
2015-11-30 10:53:11 +00:00
e n d i f
2017-03-13 21:32:30 +00:00
CPPFLAGS += -DZSTD_LEGACY_SUPPORT= $( ZSTD_LEGACY_SUPPORT)
2015-11-30 10:53:11 +00:00
2017-01-02 02:22:18 +00:00
ZSTD_OBJ := $( patsubst %.c,%.o,$( ZSTD_FILES) )
2018-06-09 19:31:17 +00:00
# macOS linker doesn't support -soname, and use different extension
2015-01-24 00:58:16 +00:00
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
i f e q ( $( shell uname ) , D a r w i n )
SHARED_EXT = dylib
SHARED_EXT_MAJOR = $( LIBVER_MAJOR) .$( SHARED_EXT)
SHARED_EXT_VER = $( LIBVER) .$( SHARED_EXT)
2016-12-09 02:42:27 +00:00
SONAME_FLAGS = -install_name $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR) -compatibility_version $( LIBVER_MAJOR) -current_version $( LIBVER)
2015-01-24 00:58:16 +00:00
e l s e
2016-12-09 02:25:36 +00:00
SONAME_FLAGS = -Wl,-soname= libzstd.$( SHARED_EXT) .$( LIBVER_MAJOR)
2015-01-24 00:58:16 +00:00
SHARED_EXT = so
SHARED_EXT_MAJOR = $( SHARED_EXT) .$( LIBVER_MAJOR)
SHARED_EXT_VER = $( SHARED_EXT) .$( LIBVER)
e n d i f
2015-11-19 10:23:44 +00:00
.PHONY : default all clean install uninstall
2015-01-24 00:58:16 +00:00
2017-02-03 04:54:14 +00:00
default : lib -release
2015-11-19 10:23:44 +00:00
2016-11-03 00:02:45 +00:00
all : lib
2015-11-19 10:23:44 +00:00
2016-11-03 00:02:45 +00:00
libzstd.a : ARFLAGS = rcs
2017-01-02 02:22:18 +00:00
libzstd.a : $( ZSTD_OBJ )
2015-01-24 00:58:16 +00:00
@echo compiling static library
2020-05-07 14:59:56 +00:00
$( Q) $( AR) $( ARFLAGS) $@ $^
2016-11-03 00:02:45 +00:00
2018-09-22 00:40:30 +00:00
i f n e q ( , $( filter Windows %,$ ( OS ) ) )
LIBZSTD = dll\l ibzstd.dll
2016-11-03 00:02:45 +00:00
$(LIBZSTD) : $( ZSTD_FILES )
2015-01-24 00:58:16 +00:00
@echo compiling dynamic library $( LIBVER)
2019-11-15 18:46:06 +00:00
$( CC) $( FLAGS) -DZSTD_DLL_EXPORT= 1 -Wl,--out-implib,dll\l ibzstd.dll.a -shared $^ -o $@
2018-09-22 00:40:30 +00:00
2016-11-21 11:51:01 +00:00
e l s e
2018-09-22 00:40:30 +00:00
LIBZSTD = libzstd.$( SHARED_EXT_VER)
$(LIBZSTD) : LDFLAGS += -shared -fPIC -fvisibility =hidden
$(LIBZSTD) : $( ZSTD_FILES )
@echo compiling dynamic library $( LIBVER)
2020-05-07 14:59:56 +00:00
$( Q) $( CC) $( FLAGS) $^ $( LDFLAGS) $( SONAME_FLAGS) -o $@
2015-01-24 00:58:16 +00:00
@echo creating versioned links
2020-05-07 14:59:56 +00:00
$( Q) ln -sf $@ libzstd.$( SHARED_EXT_MAJOR)
$( Q) ln -sf $@ libzstd.$( SHARED_EXT)
2018-09-22 00:40:30 +00:00
2016-11-03 11:59:20 +00:00
e n d i f
2015-01-24 00:58:16 +00:00
2020-05-07 14:59:56 +00:00
.PHONY : libzstd
2016-11-03 00:02:45 +00:00
libzstd : $( LIBZSTD )
2020-05-07 14:59:56 +00:00
.PHONY : lib
2020-04-29 01:29:20 +00:00
lib : libzstd .a libzstd
2020-05-07 14:59:56 +00:00
.PHONY : lib -mt
2020-04-29 01:29:20 +00:00
%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
%-mt : LDFLAGS += -pthread
2020-04-29 01:43:55 +00:00
%-mt : %
@echo multi-threading build completed
2017-04-18 21:13:01 +00:00
2020-05-07 14:59:56 +00:00
.PHONY : lib -release
2020-04-29 01:29:20 +00:00
%-release : DEBUGFLAGS :=
2020-04-29 01:43:55 +00:00
%-release : %
@echo release build completed
2017-02-03 04:54:14 +00:00
2017-09-11 21:09:34 +00:00
# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
ZSTDMT_FILES = compress/zstdmt_compress.c
ZSTD_NOMT_FILES = $( filter-out $( ZSTDMT_FILES) ,$( ZSTD_FILES) )
libzstd-nomt : LDFLAGS += -shared -fPIC -fvisibility =hidden
libzstd-nomt : $( ZSTD_NOMT_FILES )
@echo compiling single-thread dynamic library $( LIBVER)
@echo files : $( ZSTD_NOMT_FILES)
2020-05-07 14:59:56 +00:00
$( Q) $( CC) $( FLAGS) $^ $( LDFLAGS) $( SONAME_FLAGS) -o $@
2017-09-11 21:09:34 +00:00
2015-01-24 00:58:16 +00:00
clean :
2020-05-07 14:59:56 +00:00
$( Q) $( RM) -r *.dSYM # macOS-specific
$( Q) $( RM) core *.o *.a *.gcda *.$( SHARED_EXT) *.$( SHARED_EXT) .* libzstd.pc
$( Q) $( RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
$( Q) $( RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o
2015-01-24 00:58:16 +00:00
@echo Cleaning library completed
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
2020-05-07 14:59:56 +00:00
# make install is validated only for below listed environments
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
2017-08-06 20:27:54 +00:00
i f n e q ( , $( filter $ ( shell uname ) ,Linux Darwin GNU /kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku ) )
2016-12-22 19:14:37 +00:00
2017-09-06 23:35:49 +00:00
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
# support both lower and uppercase (BSD), use uppercase in script
prefix ?= /usr/local
PREFIX ?= $( prefix)
exec_prefix ?= $( PREFIX)
2020-02-26 18:52:49 +00:00
libdir ?= $( exec_prefix) /lib
2017-09-06 23:35:49 +00:00
LIBDIR ?= $( libdir)
includedir ?= $( PREFIX) /include
INCLUDEDIR ?= $( includedir)
2016-12-22 19:14:37 +00:00
2018-03-31 08:44:51 +00:00
i f n e q ( , $( filter $ ( shell uname ) ,FreeBSD NetBSD DragonFly ) )
2016-12-22 19:14:37 +00:00
PKGCONFIGDIR ?= $( PREFIX) /libdata/pkgconfig
e l s e
PKGCONFIGDIR ?= $( LIBDIR) /pkgconfig
e n d i f
2017-09-06 23:35:49 +00:00
i f n e q ( , $( filter $ ( shell uname ) ,SunOS ) )
INSTALL ?= ginstall
e l s e
INSTALL ?= install
e n d i f
INSTALL_PROGRAM ?= $( INSTALL)
INSTALL_DATA ?= $( INSTALL) -m 644
2015-01-24 00:58:16 +00:00
2020-02-26 18:52:49 +00:00
libzstd.pc :
2015-12-30 18:52:08 +00:00
libzstd.pc : libzstd .pc .in
2015-01-24 00:58:16 +00:00
@echo creating pkgconfig
2020-05-07 14:59:56 +00:00
$( Q) sed -e 's|@PREFIX@|$(PREFIX)|' \
2015-12-30 18:52:08 +00:00
-e 's|@VERSION@|$(VERSION)|' \
2015-01-24 00:58:16 +00:00
$< >$@
2018-04-16 18:44:49 +00:00
install : install -pc install -static install -shared install -includes
@echo zstd static and shared library installed
install-pc : libzstd .pc
2020-05-07 14:59:56 +00:00
$( Q) $( INSTALL) -d -m 755 $( DESTDIR) $( PKGCONFIGDIR) /
$( Q) $( INSTALL_DATA) libzstd.pc $( DESTDIR) $( PKGCONFIGDIR) /
2018-04-16 18:44:49 +00:00
install-static : libzstd .a
@echo Installing static library
2020-05-07 14:59:56 +00:00
$( Q) $( INSTALL) -d -m 755 $( DESTDIR) $( LIBDIR) /
$( Q) $( INSTALL_DATA) libzstd.a $( DESTDIR) $( LIBDIR)
2018-04-16 18:44:49 +00:00
install-shared : libzstd
@echo Installing shared library
2020-05-07 14:59:56 +00:00
$( Q) $( INSTALL) -d -m 755 $( DESTDIR) $( LIBDIR) /
$( Q) $( INSTALL_PROGRAM) $( LIBZSTD) $( DESTDIR) $( LIBDIR)
$( Q) ln -sf $( LIBZSTD) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR)
$( Q) ln -sf $( LIBZSTD) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT)
2018-04-16 18:44:49 +00:00
install-includes :
2016-12-22 19:14:37 +00:00
@echo Installing includes
2020-05-07 14:59:56 +00:00
$( Q) $( INSTALL) -d -m 755 $( DESTDIR) $( INCLUDEDIR) /
$( Q) $( INSTALL_DATA) zstd.h $( DESTDIR) $( INCLUDEDIR)
$( Q) $( INSTALL_DATA) common/zstd_errors.h $( DESTDIR) $( INCLUDEDIR)
$( Q) $( INSTALL_DATA) deprecated/zbuff.h $( DESTDIR) $( INCLUDEDIR) # prototypes generate deprecation warnings
$( Q) $( INSTALL_DATA) dictBuilder/zdict.h $( DESTDIR) $( INCLUDEDIR)
2015-01-24 00:58:16 +00:00
uninstall :
2020-05-07 14:59:56 +00:00
$( Q) $( RM) $( DESTDIR) $( LIBDIR) /libzstd.a
$( Q) $( RM) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT)
$( Q) $( RM) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR)
$( Q) $( RM) $( DESTDIR) $( LIBDIR) /$( LIBZSTD)
$( Q) $( RM) $( DESTDIR) $( PKGCONFIGDIR) /libzstd.pc
$( Q) $( RM) $( DESTDIR) $( INCLUDEDIR) /zstd.h
$( Q) $( RM) $( DESTDIR) $( INCLUDEDIR) /zstd_errors.h
$( Q) $( RM) $( DESTDIR) $( INCLUDEDIR) /zbuff.h # Deprecated streaming functions
$( Q) $( RM) $( DESTDIR) $( INCLUDEDIR) /zdict.h
2015-01-24 00:58:16 +00:00
@echo zstd libraries successfully uninstalled
e n d i f