2017-08-31 19:20:50 +00:00
# ################################################################
2020-03-26 22:19:05 +00:00
# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
2015-01-24 00:58:16 +00:00
# All rights reserved.
2016-05-28 23:06:30 +00:00
#
2017-08-31 19:20:50 +00:00
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
2020-03-26 22:19:05 +00:00
# You may select, at your option, one of the above-listed licenses.
2017-08-31 19:20:50 +00:00
# ################################################################
2015-01-24 00:58:16 +00:00
2020-10-23 17:22:52 +00:00
# define silent mode as default (verbose mode with V=1 or VERBOSE=1)
$(V)$(VERBOSE).SILENT :
2020-05-07 14:59:56 +00:00
2020-10-21 11:01:01 +00:00
# When cross-compiling from linux to windows,
# you might need to specify this as "Windows."
# Fedora build fails without it.
2020-05-08 03:07:40 +00:00
#
2020-10-21 11:01:01 +00:00
# Note: mingw-w64 build from linux to windows
# does not fail on other tested distros (ubuntu, debian)
# even without manually specifying the TARGET_SYSTEM.
2020-05-08 03:07:40 +00:00
TARGET_SYSTEM ?= $( OS)
2015-01-24 00:58:16 +00:00
# Version numbers
2016-07-17 18:42:21 +00:00
LIBVER_MAJOR_SCRIPT := ` sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_MINOR_SCRIPT := ` sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_PATCH_SCRIPT := ` sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
2015-12-30 18:52:08 +00:00
LIBVER_SCRIPT := $( LIBVER_MAJOR_SCRIPT) .$( LIBVER_MINOR_SCRIPT) .$( LIBVER_PATCH_SCRIPT)
LIBVER_MAJOR := $( shell echo $( LIBVER_MAJOR_SCRIPT) )
LIBVER_MINOR := $( shell echo $( LIBVER_MINOR_SCRIPT) )
LIBVER_PATCH := $( shell echo $( LIBVER_PATCH_SCRIPT) )
LIBVER := $( shell echo $( LIBVER_SCRIPT) )
2015-12-31 17:32:15 +00:00
VERSION ?= $( LIBVER)
perf improvements for zstd decode (#1668)
* perf improvements for zstd decode
tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge)
Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization.
See how GCC autovectorizes the loop here:
https://godbolt.org/z/apr0x0
Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on)
After: https://godbolt.org/z/OwO4F8
Note that autovectorization still does not do a good job on the optimized version, so it's turned off\
via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both.
silesia benchmark data - second triad of each file is with the original code:
file orig compressedratio encode decode change
1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s
2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s
3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s
1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60%
2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01%
3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99%
1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s
2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s
3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s
1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30%
2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20%
3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50%
1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s
2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s
3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s
1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76%
2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54%
3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89%
1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s
2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s
3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s
1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47%
2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72%
3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29%
1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s
2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s
3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s
1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15%
2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98%
3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45%
1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s
2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s
3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s
1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41%
2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42%
3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68%
1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s
2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s
3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s
1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26%
2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36%
3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38%
1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s
2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s
3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s
1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45%
2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93%
3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04%
1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s
2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s
3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s
1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70%
2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85%
3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31%
1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s
2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s
3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s
1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89%
2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43%
3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33%
1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s
2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s
3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s
1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24%
2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99%
3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05%
1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s
2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s
3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s
1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52%
2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00%
3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37%
7.51%
* makefile changed to only pass -fno-tree-vectorize to gcc
* <Replace this line with a title. Use 1 line only, 67 chars or less>
Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__)
* fix for warning/error with subtraction of void* pointers
* fix c90 conformance issue - ISO C90 forbids mixed declarations and code
* Fix assert for negative diff, only when there is no overlap
* fix overflow revealed in fuzzing tests
* tweak for small speed increase
2019-07-11 22:31:07 +00:00
CCVER := $( shell $( CC) --version)
2015-01-24 00:58:16 +00:00
2020-09-03 21:28:37 +00:00
# This is a helper variable that configures a bunch of other variables to new,
# space-optimized defaults.
ZSTD_LIB_MINIFY ?= 0
i f n e q ( $( ZSTD_LIB_MINIFY ) , 0 )
2020-10-21 10:22:27 +00:00
HAVE_CC_OZ ?= $( shell echo "" | $( CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
ZSTD_LEGACY_SUPPORT ?= 0
ZSTD_LIB_DEPRECATED ?= 0
HUF_FORCE_DECOMPRESS_X1 ?= 1
ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
ZSTD_NO_INLINE ?= 1
ZSTD_STRIP_ERROR_STRINGS ?= 1
i f n e q ( $( HAVE_CC_OZ ) , 0 )
# Some compilers (clang) support an even more space-optimized setting.
CFLAGS += -Oz
2020-09-03 21:28:37 +00:00
e l s e
2020-10-21 10:22:27 +00:00
CFLAGS += -Os
e n d i f
CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
-DDYNAMIC_BMI2= 0 -DNDEBUG
e l s e
CFLAGS += -O3
2020-09-03 21:28:37 +00:00
e n d i f
2020-10-22 02:22:45 +00:00
DEBUGLEVEL ?= 0
CPPFLAGS += -DXXH_NAMESPACE= ZSTD_ -DDEBUGLEVEL= $( DEBUGLEVEL)
2020-05-08 03:07:40 +00:00
i f e q ( $( TARGET_SYSTEM ) , W i n d o w s _ N T ) # MinGW assumed
2020-10-22 02:22:45 +00:00
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
2018-07-30 14:56:18 +00:00
e n d i f
2018-08-27 02:29:12 +00:00
DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
2017-05-16 18:34:38 +00:00
-Wstrict-aliasing= 1 -Wswitch-enum -Wdeclaration-after-statement \
2019-02-01 09:31:02 +00:00
-Wstrict-prototypes -Wundef -Wpointer-arith \
2017-05-16 18:34:38 +00:00
-Wvla -Wformat= 2 -Winit-self -Wfloat-equal -Wwrite-strings \
2018-10-26 23:38:23 +00:00
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
2020-10-22 02:22:45 +00:00
CFLAGS += $( DEBUGFLAGS) $( MOREFLAGS)
FLAGS = $( CPPFLAGS) $( CFLAGS)
2015-01-24 00:58:16 +00:00
2018-12-03 23:46:55 +00:00
HAVE_COLORNEVER = $( shell echo a | grep --color= never a > /dev/null 2> /dev/null && echo 1 || echo 0)
GREP_OPTIONS ?=
i f e q ( $ H A V E _ C O L O R N E V E R , 1 )
2020-10-21 10:22:27 +00:00
GREP_OPTIONS += --color= never
2018-12-03 23:46:55 +00:00
e n d i f
GREP = grep $( GREP_OPTIONS)
2020-06-29 20:44:23 +00:00
SED_ERE_OPT ?= -E
2018-09-25 23:56:53 +00:00
2018-06-04 16:56:29 +00:00
ZSTDCOMMON_FILES := $( sort $( wildcard common/*.c) )
ZSTDCOMP_FILES := $( sort $( wildcard compress/*.c) )
ZSTDDECOMP_FILES := $( sort $( wildcard decompress/*.c) )
ZDICT_FILES := $( sort $( wildcard dictBuilder/*.c) )
ZDEPR_FILES := $( sort $( wildcard deprecated/*.c) )
ZSTD_FILES := $( ZSTDCOMMON_FILES)
perf improvements for zstd decode (#1668)
* perf improvements for zstd decode
tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge)
Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization.
See how GCC autovectorizes the loop here:
https://godbolt.org/z/apr0x0
Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on)
After: https://godbolt.org/z/OwO4F8
Note that autovectorization still does not do a good job on the optimized version, so it's turned off\
via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both.
silesia benchmark data - second triad of each file is with the original code:
file orig compressedratio encode decode change
1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s
2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s
3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s
1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60%
2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01%
3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99%
1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s
2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s
3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s
1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30%
2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20%
3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50%
1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s
2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s
3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s
1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76%
2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54%
3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89%
1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s
2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s
3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s
1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47%
2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72%
3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29%
1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s
2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s
3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s
1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15%
2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98%
3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45%
1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s
2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s
3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s
1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41%
2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42%
3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68%
1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s
2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s
3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s
1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26%
2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36%
3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38%
1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s
2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s
3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s
1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45%
2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93%
3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04%
1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s
2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s
3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s
1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70%
2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85%
3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31%
1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s
2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s
3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s
1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89%
2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43%
3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33%
1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s
2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s
3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s
1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24%
2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99%
3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05%
1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s
2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s
3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s
1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52%
2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00%
3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37%
7.51%
* makefile changed to only pass -fno-tree-vectorize to gcc
* <Replace this line with a title. Use 1 line only, 67 chars or less>
Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__)
* fix for warning/error with subtraction of void* pointers
* fix c90 conformance issue - ISO C90 forbids mixed declarations and code
* Fix assert for negative diff, only when there is no overlap
* fix overflow revealed in fuzzing tests
* tweak for small speed increase
2019-07-11 22:31:07 +00:00
i f e q ( $( findstring GCC ,$ ( CCVER ) ) , G C C )
2020-10-21 05:19:57 +00:00
decompress/zstd_decompress_block.o : CFLAGS +=-fno -tree -vectorize
perf improvements for zstd decode (#1668)
* perf improvements for zstd decode
tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge)
Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization.
See how GCC autovectorizes the loop here:
https://godbolt.org/z/apr0x0
Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on)
After: https://godbolt.org/z/OwO4F8
Note that autovectorization still does not do a good job on the optimized version, so it's turned off\
via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both.
silesia benchmark data - second triad of each file is with the original code:
file orig compressedratio encode decode change
1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s
2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s
3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s
1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60%
2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01%
3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99%
1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s
2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s
3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s
1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30%
2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20%
3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50%
1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s
2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s
3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s
1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76%
2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54%
3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89%
1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s
2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s
3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s
1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47%
2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72%
3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29%
1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s
2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s
3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s
1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15%
2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98%
3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45%
1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s
2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s
3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s
1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41%
2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42%
3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68%
1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s
2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s
3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s
1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26%
2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36%
3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38%
1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s
2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s
3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s
1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45%
2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93%
3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04%
1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s
2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s
3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s
1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70%
2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85%
3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31%
1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s
2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s
3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s
1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89%
2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43%
3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33%
1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s
2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s
3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s
1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24%
2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99%
3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05%
1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s
2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s
3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s
1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52%
2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00%
3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37%
7.51%
* makefile changed to only pass -fno-tree-vectorize to gcc
* <Replace this line with a title. Use 1 line only, 67 chars or less>
Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__)
* fix for warning/error with subtraction of void* pointers
* fix c90 conformance issue - ISO C90 forbids mixed declarations and code
* Fix assert for negative diff, only when there is no overlap
* fix overflow revealed in fuzzing tests
* tweak for small speed increase
2019-07-11 22:31:07 +00:00
e n d i f
2020-01-27 22:14:08 +00:00
# Modules
2018-06-04 16:56:29 +00:00
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
ZSTD_LIB_DICTBUILDER ?= 1
ZSTD_LIB_DEPRECATED ?= 1
2020-01-27 22:14:08 +00:00
# Legacy support
ZSTD_LEGACY_SUPPORT ?= 5
ZSTD_LEGACY_MULTITHREADED_API ?= 0
# Build size optimizations
2018-12-06 20:32:32 +00:00
HUF_FORCE_DECOMPRESS_X1 ?= 0
HUF_FORCE_DECOMPRESS_X2 ?= 0
ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
ZSTD_NO_INLINE ?= 0
ZSTD_STRIP_ERROR_STRINGS ?= 0
2018-06-05 00:19:06 +00:00
2018-06-04 16:56:29 +00:00
i f e q ( $( ZSTD_LIB_COMPRESSION ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_LIB_DICTBUILDER = 0
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
e n d i f
i f e q ( $( ZSTD_LIB_DECOMPRESSION ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_LEGACY_SUPPORT = 0
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_COMPRESSION ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $( ZSTDCOMP_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_DECOMPRESSION ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $( ZSTDDECOMP_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
2018-06-05 00:19:06 +00:00
i f n e q ( $( ZSTD_LIB_DEPRECATED ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $( ZDEPR_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
i f n e q ( $( ZSTD_LIB_DICTBUILDER ) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $( ZDICT_FILES)
2018-06-04 16:56:29 +00:00
e n d i f
2017-03-13 21:32:30 +00:00
2018-12-06 20:32:32 +00:00
i f n e q ( $( HUF_FORCE_DECOMPRESS_X 1) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
2018-12-06 20:32:32 +00:00
e n d i f
i f n e q ( $( HUF_FORCE_DECOMPRESS_X 2) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
2018-12-06 20:32:32 +00:00
e n d i f
i f n e q ( $( ZSTD_FORCE_DECOMPRESS_SHORT ) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
2018-12-06 20:32:32 +00:00
e n d i f
i f n e q ( $( ZSTD_FORCE_DECOMPRESS_LONG ) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
2018-12-06 20:32:32 +00:00
e n d i f
i f n e q ( $( ZSTD_NO_INLINE ) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_NO_INLINE
2018-12-06 20:32:32 +00:00
e n d i f
i f n e q ( $( ZSTD_STRIP_ERROR_STRINGS ) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
2018-12-06 20:32:32 +00:00
e n d i f
2019-04-08 01:47:52 +00:00
i f n e q ( $( ZSTD_LEGACY_MULTITHREADED_API ) , 0 )
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
2019-04-08 01:47:52 +00:00
e n d i f
2017-03-13 21:32:30 +00:00
i f n e q ( $( ZSTD_LEGACY_SUPPORT ) , 0 )
i f e q ( $( shell test $ ( ZSTD_LEGACY_SUPPORT ) -lt 8; echo $ $ ?) , 0 )
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $( shell ls legacy/*.c | $( GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]' )
2017-03-13 21:32:30 +00:00
e n d i f
2015-11-30 10:53:11 +00:00
e n d i f
2017-03-13 21:32:30 +00:00
CPPFLAGS += -DZSTD_LEGACY_SUPPORT= $( ZSTD_LEGACY_SUPPORT)
2015-11-30 10:53:11 +00:00
2020-10-22 00:25:07 +00:00
ZSTD_LOCAL_SRC := $( notdir $( ZSTD_FILES) )
ZSTD_LOCAL_OBJ := $( ZSTD_LOCAL_SRC:.c= .o)
2020-10-21 05:19:57 +00:00
2020-10-22 00:25:07 +00:00
ZSTD_SUBDIR := common compress decompress dictBuilder legacy deprecated
v p a t h % . c $( ZSTD_SUBDIR )
2020-10-21 05:19:57 +00:00
2020-10-22 02:22:45 +00:00
UNAME := $( shell uname)
2020-10-23 01:48:06 +00:00
BUILD_DIR ?= 0
i f e q ( $( BUILD_DIR ) , 0 )
2020-10-22 02:22:45 +00:00
i f e q ( $( UNAME ) , D a r w i n )
HASH ?= md5
e n d i f
i f e q ( $( UNAME ) , F r e e B S D )
HASH ?= gmd5sum
e n d i f
i f e q ( $( UNAME ) , O p e n B S D )
HASH ?= md5
e n d i f
HASH ?= md5sum
2020-10-23 01:48:06 +00:00
HAVE_HASH := $( shell echo 1 | $( HASH) > /dev/null && echo 1 || echo 0)
i f e q ( $( HAVE_HASH ) , 0 )
$( info warning : could not find HASH ( $( HASH) ) , needed to differentiate builds using different flags)
BUILD_DIR := obj/generic_noconf
2020-10-22 02:22:45 +00:00
e n d i f
2020-10-23 01:48:06 +00:00
e n d i f # $(BUILD_DIR) == 0
2020-10-22 02:22:45 +00:00
2018-06-09 19:31:17 +00:00
# macOS linker doesn't support -soname, and use different extension
2015-01-24 00:58:16 +00:00
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
2020-10-22 06:38:33 +00:00
i f e q ( $( UNAME ) , D a r w i n )
2020-10-21 05:19:57 +00:00
SHARED_EXT = dylib
SHARED_EXT_MAJOR = $( LIBVER_MAJOR) .$( SHARED_EXT)
SHARED_EXT_VER = $( LIBVER) .$( SHARED_EXT)
SONAME_FLAGS = -install_name $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR) -compatibility_version $( LIBVER_MAJOR) -current_version $( LIBVER)
2015-01-24 00:58:16 +00:00
e l s e
2020-10-21 05:19:57 +00:00
SONAME_FLAGS = -Wl,-soname= libzstd.$( SHARED_EXT) .$( LIBVER_MAJOR)
SHARED_EXT = so
SHARED_EXT_MAJOR = $( SHARED_EXT) .$( LIBVER_MAJOR)
SHARED_EXT_VER = $( SHARED_EXT) .$( LIBVER)
2015-01-24 00:58:16 +00:00
e n d i f
2020-05-08 17:06:31 +00:00
.PHONY : default lib -all all clean install uninstall
2015-01-24 00:58:16 +00:00
2017-02-03 04:54:14 +00:00
default : lib -release
2015-11-19 10:23:44 +00:00
2020-05-08 17:06:31 +00:00
# alias
lib-all : all
2020-05-08 20:01:26 +00:00
all : lib
2015-11-19 10:23:44 +00:00
2020-10-23 01:48:06 +00:00
i f e q ( $( BUILD_DIR ) , 0 )
# determine a BUILD_DIR
.PHONY : libzstd .a # not the actual recipe
libzstd.a :
2020-10-23 17:22:52 +00:00
$( MAKE) --no-print-directory $@ \
2020-10-23 17:27:12 +00:00
BUILD_DIR = obj/conf_$( shell echo $( CC) $( CPPFLAGS) $( CFLAGS) $( LDFLAGS) $( ZSTD_FILES) | $( HASH) | head -c 16) \
2020-10-23 01:48:06 +00:00
CPPFLAGS = " $( CPPFLAGS) "
e l s e
# $(BUILD_DIR) is defined
2020-10-23 17:14:04 +00:00
ZSTD_STATLIB_DIR := $( BUILD_DIR) /static
ZSTD_STATLIB := $( ZSTD_STATLIB_DIR) /libzstd.a
ZSTD_STATLIB_OBJ := $( addprefix $( ZSTD_STATLIB_DIR) /,$( ZSTD_LOCAL_OBJ) )
$(ZSTD_STATLIB) : ARFLAGS ?= rcs
$(ZSTD_STATLIB) : | $( ZSTD_STATLIB_DIR )
2020-10-23 01:48:06 +00:00
$(ZSTD_STATLIB) : $( ZSTD_STATLIB_OBJ )
2015-01-24 00:58:16 +00:00
@echo compiling static library
2020-10-23 17:22:52 +00:00
$( AR) $( ARFLAGS) $@ $^
2016-11-03 00:02:45 +00:00
2020-10-22 02:22:45 +00:00
.PHONY : libzstd .a # must be run every time
2020-10-23 17:14:04 +00:00
libzstd.a : $( ZSTD_STATLIB )
2020-10-23 17:22:52 +00:00
ln -sf $< $@
2020-10-22 02:22:45 +00:00
2020-10-23 01:48:06 +00:00
e n d i f
2020-05-08 03:07:40 +00:00
i f n e q ( , $( filter Windows %,$ ( TARGET_SYSTEM ) ) )
2018-09-22 00:40:30 +00:00
LIBZSTD = dll\l ibzstd.dll
2020-10-22 18:28:46 +00:00
$(LIBZSTD) : $( ZSTD_FILES )
2015-01-24 00:58:16 +00:00
@echo compiling dynamic library $( LIBVER)
2019-11-15 18:46:06 +00:00
$( CC) $( FLAGS) -DZSTD_DLL_EXPORT= 1 -Wl,--out-implib,dll\l ibzstd.dll.a -shared $^ -o $@
2018-09-22 00:40:30 +00:00
2016-11-21 11:51:01 +00:00
e l s e
2018-09-22 00:40:30 +00:00
LIBZSTD = libzstd.$( SHARED_EXT_VER)
2020-10-23 01:48:06 +00:00
$(LIBZSTD) : CFLAGS += -fPIC
$(LIBZSTD) : LDFLAGS += -shared -fvisibility =hidden
i f e q ( $( BUILD_DIR ) , 0 )
# determine a BUILD_DIR
.PHONY : $( LIBZSTD ) # not the actual recipe
$(LIBZSTD) :
2020-10-23 17:22:52 +00:00
$( MAKE) --no-print-directory $@ \
2020-10-23 17:27:12 +00:00
BUILD_DIR = obj/conf_$( shell echo $( CC) $( CPPFLAGS) $( CFLAGS) $( LDFLAGS) $( ZSTD_FILES) | $( HASH) | head -c 16) \
2020-10-23 01:48:06 +00:00
CPPFLAGS = " $( CPPFLAGS) " \
CFLAGS = " $( CFLAGS) " \
LDFLAGS = " $( LDFLAGS) "
e l s e
# $(BUILD_DIR) is defined
2020-10-23 17:14:04 +00:00
ZSTD_DYNLIB_DIR := $( BUILD_DIR) /dynamic
ZSTD_DYNLIB := $( ZSTD_DYNLIB_DIR) /$( LIBZSTD)
ZSTD_DYNLIB_OBJ := $( addprefix $( ZSTD_DYNLIB_DIR) /,$( ZSTD_LOCAL_OBJ) )
2020-10-23 01:48:06 +00:00
2020-10-23 17:14:04 +00:00
$(ZSTD_DYNLIB) : | $( ZSTD_DYNLIB_DIR )
2020-10-23 01:48:06 +00:00
$(ZSTD_DYNLIB) : $( ZSTD_DYNLIB_OBJ )
2018-09-22 00:40:30 +00:00
@echo compiling dynamic library $( LIBVER)
2020-10-23 17:22:52 +00:00
$( CC) $( FLAGS) $^ $( LDFLAGS) $( SONAME_FLAGS) -o $@
2015-01-24 00:58:16 +00:00
@echo creating versioned links
2020-10-23 17:22:52 +00:00
ln -sf $@ libzstd.$( SHARED_EXT_MAJOR)
ln -sf $@ libzstd.$( SHARED_EXT)
2018-09-22 00:40:30 +00:00
2020-10-22 02:22:45 +00:00
.PHONY : $( LIBZSTD ) # must be run every time
2020-10-23 01:48:06 +00:00
$(LIBZSTD) : $( ZSTD_DYNLIB )
2020-10-23 17:22:52 +00:00
ln -sf $< $@
2020-10-22 02:22:45 +00:00
2020-10-23 01:48:06 +00:00
e n d i f # if BUILD_DIR
e n d i f # if windows
2020-10-22 18:28:46 +00:00
2020-05-07 14:59:56 +00:00
.PHONY : libzstd
2016-11-03 00:02:45 +00:00
libzstd : $( LIBZSTD )
2020-05-07 14:59:56 +00:00
.PHONY : lib
2020-04-29 01:29:20 +00:00
lib : libzstd .a libzstd
2020-10-21 10:44:38 +00:00
2020-05-25 13:50:45 +00:00
# note : do not define lib-mt or lib-release as .PHONY
# make does not consider implicit pattern rule for .PHONY target
2020-04-29 01:29:20 +00:00
%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
%-mt : LDFLAGS += -pthread
2020-04-29 01:43:55 +00:00
%-mt : %
@echo multi-threading build completed
2017-04-18 21:13:01 +00:00
2020-04-29 01:29:20 +00:00
%-release : DEBUGFLAGS :=
2020-04-29 01:43:55 +00:00
%-release : %
@echo release build completed
2020-10-22 00:25:07 +00:00
# Generate .h dependencies automatically
DEPFLAGS = -MT $@ -MMD -MP -MF
2020-10-23 17:14:04 +00:00
$(ZSTD_DYNLIB_DIR)/%.o : %.c $( ZSTD_DYNLIB_DIR ) /%.d | $( ZSTD_DYNLIB_DIR )
2020-10-22 00:25:07 +00:00
@echo $@
2020-10-23 17:22:52 +00:00
$( COMPILE.c) $( DEPFLAGS) $( ZSTD_DYNLIB_DIR) /$* .d $( OUTPUT_OPTION) $<
2020-10-23 17:14:04 +00:00
$(ZSTD_STATLIB_DIR)/%.o : %.c $( ZSTD_STATLIB_DIR ) /%.d | $( ZSTD_STATLIB_DIR )
@echo $@
2020-10-23 17:22:52 +00:00
$( COMPILE.c) $( DEPFLAGS) $( ZSTD_STATLIB_DIR) /$* .d $( OUTPUT_OPTION) $<
2020-10-21 10:44:38 +00:00
MKDIR ?= mkdir
2020-10-23 17:14:04 +00:00
$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR) :
2020-10-23 17:22:52 +00:00
$( MKDIR) -p $@
2020-10-22 00:25:07 +00:00
DEPFILES := $( ZSTD_DYNLIB_OBJ:.o= .d) $( ZSTD_STATLIB_OBJ:.o= .d)
$(DEPFILES) :
i n c l u d e $( wildcard $ ( DEPFILES ) )
2017-02-03 04:54:14 +00:00
2017-09-11 21:09:34 +00:00
# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
ZSTDMT_FILES = compress/zstdmt_compress.c
ZSTD_NOMT_FILES = $( filter-out $( ZSTDMT_FILES) ,$( ZSTD_FILES) )
libzstd-nomt : LDFLAGS += -shared -fPIC -fvisibility =hidden
libzstd-nomt : $( ZSTD_NOMT_FILES )
@echo compiling single-thread dynamic library $( LIBVER)
@echo files : $( ZSTD_NOMT_FILES)
2020-10-23 17:22:52 +00:00
$( CC) $( FLAGS) $^ $( LDFLAGS) $( SONAME_FLAGS) -o $@
2017-09-11 21:09:34 +00:00
2015-01-24 00:58:16 +00:00
clean :
2020-10-23 17:22:52 +00:00
$( RM) -r *.dSYM # macOS-specific
$( RM) core *.o *.a *.gcda *.$( SHARED_EXT) *.$( SHARED_EXT) .* libzstd.pc
$( RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
$( RM) -r obj/*
2015-01-24 00:58:16 +00:00
@echo Cleaning library completed
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
2020-05-07 14:59:56 +00:00
# make install is validated only for below listed environments
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
2020-10-22 06:38:33 +00:00
i f n e q ( , $( filter $ ( UNAME ) ,Linux Darwin GNU /kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku ) )
2016-12-22 19:14:37 +00:00
2020-05-08 20:01:26 +00:00
all : libzstd .pc
2017-09-06 23:35:49 +00:00
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
# support both lower and uppercase (BSD), use uppercase in script
prefix ?= /usr/local
PREFIX ?= $( prefix)
exec_prefix ?= $( PREFIX)
2020-05-07 17:12:59 +00:00
EXEC_PREFIX ?= $( exec_prefix)
libdir ?= $( EXEC_PREFIX) /lib
2017-09-06 23:35:49 +00:00
LIBDIR ?= $( libdir)
includedir ?= $( PREFIX) /include
INCLUDEDIR ?= $( includedir)
2016-12-22 19:14:37 +00:00
2020-06-29 20:44:23 +00:00
PCLIBDIR ?= $( shell echo " $( LIBDIR) " | sed -n $( SED_ERE_OPT) -e " s@^ $( EXEC_PREFIX) (/| $$ )@@p " )
PCINCDIR ?= $( shell echo " $( INCLUDEDIR) " | sed -n $( SED_ERE_OPT) -e " s@^ $( PREFIX) (/| $$ )@@p " )
2020-05-07 17:12:59 +00:00
i f e q ( , $( PCLIBDIR ) )
# Additional prefix check is required, since the empty string is technically a
# valid PCLIBDIR
2020-06-29 20:44:23 +00:00
i f e q ( , $( shell echo "$ ( LIBDIR ) " | sed -n $ ( SED_ERE_OPT ) -e "\\@^$ ( EXEC_PREFIX ) ( /|$ $ ) @ p ") )
2020-10-22 00:25:07 +00:00
$( error configured libdir ( $ ( LIBDIR ) ) is outside of prefix ( $ ( EXEC_PREFIX ) ) , can 't generate pkg -config file )
2020-05-07 17:12:59 +00:00
e n d i f
e n d i f
i f e q ( , $( PCINCDIR ) )
# Additional prefix check is required, since the empty string is technically a
# valid PCINCDIR
2020-06-29 20:44:23 +00:00
i f e q ( , $( shell echo "$ ( INCLUDEDIR ) " | sed -n $ ( SED_ERE_OPT ) -e "\\@^$ ( PREFIX ) ( /|$ $ ) @ p ") )
2020-10-22 00:25:07 +00:00
$( error configured includedir ( $ ( INCLUDEDIR ) ) is outside of exec_prefix ( $ ( PREFIX ) ) , can 't generate pkg -config file )
2020-05-07 17:12:59 +00:00
e n d i f
e n d i f
2020-10-22 06:38:33 +00:00
i f n e q ( , $( filter $ ( UNAME ) ,FreeBSD NetBSD DragonFly ) )
2020-10-21 10:22:27 +00:00
PKGCONFIGDIR ?= $( PREFIX) /libdata/pkgconfig
2016-12-22 19:14:37 +00:00
e l s e
2020-10-21 10:22:27 +00:00
PKGCONFIGDIR ?= $( LIBDIR) /pkgconfig
2016-12-22 19:14:37 +00:00
e n d i f
2020-10-22 06:38:33 +00:00
i f n e q ( , $( filter $ ( UNAME ) ,SunOS ) )
2020-10-21 10:22:27 +00:00
INSTALL ?= ginstall
2017-09-06 23:35:49 +00:00
e l s e
2020-10-21 10:22:27 +00:00
INSTALL ?= install
2017-09-06 23:35:49 +00:00
e n d i f
INSTALL_PROGRAM ?= $( INSTALL)
INSTALL_DATA ?= $( INSTALL) -m 644
2015-01-24 00:58:16 +00:00
2020-02-26 18:52:49 +00:00
libzstd.pc :
2015-12-30 18:52:08 +00:00
libzstd.pc : libzstd .pc .in
2015-01-24 00:58:16 +00:00
@echo creating pkgconfig
2020-10-23 17:22:52 +00:00
@sed $( SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \
2020-05-07 17:12:59 +00:00
-e 's|@LIBDIR@|$(PCLIBDIR)|' \
-e 's|@INCLUDEDIR@|$(PCINCDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@
2015-01-24 00:58:16 +00:00
2018-04-16 18:44:49 +00:00
install : install -pc install -static install -shared install -includes
@echo zstd static and shared library installed
install-pc : libzstd .pc
2020-10-23 17:22:52 +00:00
$( INSTALL) -d -m 755 $( DESTDIR) $( PKGCONFIGDIR) /
$( INSTALL_DATA) libzstd.pc $( DESTDIR) $( PKGCONFIGDIR) /
2018-04-16 18:44:49 +00:00
install-static : libzstd .a
@echo Installing static library
2020-10-23 17:22:52 +00:00
$( INSTALL) -d -m 755 $( DESTDIR) $( LIBDIR) /
$( INSTALL_DATA) libzstd.a $( DESTDIR) $( LIBDIR)
2018-04-16 18:44:49 +00:00
install-shared : libzstd
@echo Installing shared library
2020-10-23 17:22:52 +00:00
$( INSTALL) -d -m 755 $( DESTDIR) $( LIBDIR) /
$( INSTALL_PROGRAM) $( LIBZSTD) $( DESTDIR) $( LIBDIR)
ln -sf $( LIBZSTD) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR)
ln -sf $( LIBZSTD) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT)
2018-04-16 18:44:49 +00:00
install-includes :
2016-12-22 19:14:37 +00:00
@echo Installing includes
2020-10-23 17:22:52 +00:00
$( INSTALL) -d -m 755 $( DESTDIR) $( INCLUDEDIR) /
$( INSTALL_DATA) zstd.h $( DESTDIR) $( INCLUDEDIR)
$( INSTALL_DATA) common/zstd_errors.h $( DESTDIR) $( INCLUDEDIR)
$( INSTALL_DATA) dictBuilder/zdict.h $( DESTDIR) $( INCLUDEDIR)
2015-01-24 00:58:16 +00:00
uninstall :
2020-10-23 17:22:52 +00:00
$( RM) $( DESTDIR) $( LIBDIR) /libzstd.a
$( RM) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT)
$( RM) $( DESTDIR) $( LIBDIR) /libzstd.$( SHARED_EXT_MAJOR)
$( RM) $( DESTDIR) $( LIBDIR) /$( LIBZSTD)
$( RM) $( DESTDIR) $( PKGCONFIGDIR) /libzstd.pc
$( RM) $( DESTDIR) $( INCLUDEDIR) /zstd.h
$( RM) $( DESTDIR) $( INCLUDEDIR) /zstd_errors.h
$( RM) $( DESTDIR) $( INCLUDEDIR) /zdict.h
2015-01-24 00:58:16 +00:00
@echo zstd libraries successfully uninstalled
e n d i f