zstd/lib/Makefile

455 lines
14 KiB
Makefile
Raw Normal View History

# ################################################################
# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
2015-01-24 00:58:16 +00:00
# All rights reserved.
2016-05-28 23:06:30 +00:00
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ################################################################
2015-01-24 00:58:16 +00:00
2020-10-23 17:41:17 +00:00
.PHONY: default
default: lib-release
2020-10-23 17:22:52 +00:00
# define silent mode as default (verbose mode with V=1 or VERBOSE=1)
$(V)$(VERBOSE).SILENT:
support for verbose make A commonly accepted makefile idiom is V=1 or VERBOSE=1 to request the printing of all commands. This is not "default" though, and must be manually added. Example : Before : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 creating versioned links ``` After : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 cc -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=5 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wmissing-prototypes -Wc++-compat -O3 common/debug.c common/entropy_common.c common/error_private.c common/fse_decompress.c common/pool.c common/threading.c common/xxhash.c common/zstd_common.c compress/fse_compress.c compress/hist.c compress/huf_compress.c compress/zstd_compress.c compress/zstd_compress_literals.c compress/zstd_compress_sequences.c compress/zstd_compress_superblock.c compress/zstd_double_fast.c compress/zstd_fast.c compress/zstd_lazy.c compress/zstd_ldm.c compress/zstd_opt.c compress/zstdmt_compress.c decompress/huf_decompress.c decompress/zstd_ddict.c decompress/zstd_decompress.c decompress/zstd_decompress_block.c deprecated/zbuff_common.c deprecated/zbuff_compress.c deprecated/zbuff_decompress.c dictBuilder/cover.c dictBuilder/divsufsort.c dictBuilder/fastcover.c dictBuilder/zdict.c legacy/zstd_v05.c legacy/zstd_v06.c legacy/zstd_v07.c -shared -fPIC -fvisibility=hidden -Wl,-soname=libzstd.so.1 -o libzstd.so.1.4.5 creating versioned links ln -sf libzstd.so.1.4.5 libzstd.so.1 ln -sf libzstd.so.1.4.5 libzstd.so ```
2020-05-07 14:59:56 +00:00
2020-10-21 11:01:01 +00:00
# When cross-compiling from linux to windows,
# one might need to specify TARGET_SYSTEM as "Windows."
# Building from Fedora fails without it.
# (but Ubuntu and Debian don't need to set anything)
2020-05-08 03:07:40 +00:00
TARGET_SYSTEM ?= $(OS)
2015-01-24 00:58:16 +00:00
# Version numbers
2016-07-17 18:42:21 +00:00
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
LIBVER := $(shell echo $(LIBVER_SCRIPT))
2015-12-31 17:32:15 +00:00
VERSION?= $(LIBVER)
perf improvements for zstd decode (#1668) * perf improvements for zstd decode tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge) Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization. See how GCC autovectorizes the loop here: https://godbolt.org/z/apr0x0 Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on) After: https://godbolt.org/z/OwO4F8 Note that autovectorization still does not do a good job on the optimized version, so it's turned off\ via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both. silesia benchmark data - second triad of each file is with the original code: file orig compressedratio encode decode change 1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s 2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s 3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s 1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60% 2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01% 3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99% 1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s 2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s 3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s 1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30% 2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20% 3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50% 1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s 2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s 3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s 1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76% 2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54% 3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89% 1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s 2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s 3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s 1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47% 2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72% 3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29% 1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s 2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s 3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s 1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15% 2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98% 3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45% 1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s 2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s 3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s 1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41% 2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42% 3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68% 1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s 2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s 3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s 1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26% 2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36% 3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38% 1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s 2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s 3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s 1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45% 2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93% 3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04% 1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s 2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s 3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s 1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70% 2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85% 3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31% 1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s 2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s 3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s 1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89% 2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43% 3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33% 1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s 2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s 3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s 1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24% 2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99% 3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05% 1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s 2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s 3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s 1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52% 2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00% 3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37% 7.51% * makefile changed to only pass -fno-tree-vectorize to gcc * <Replace this line with a title. Use 1 line only, 67 chars or less> Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__) * fix for warning/error with subtraction of void* pointers * fix c90 conformance issue - ISO C90 forbids mixed declarations and code * Fix assert for negative diff, only when there is no overlap * fix overflow revealed in fuzzing tests * tweak for small speed increase
2019-07-11 22:31:07 +00:00
CCVER := $(shell $(CC) --version)
2015-01-24 00:58:16 +00:00
# ZSTD_LIB_MINIFY is a helper variable that
# configures a bunch of other variables to space-optimized defaults.
ZSTD_LIB_MINIFY ?= 0
ifneq ($(ZSTD_LIB_MINIFY), 0)
2020-10-21 10:22:27 +00:00
HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
ZSTD_LEGACY_SUPPORT ?= 0
ZSTD_LIB_DEPRECATED ?= 0
HUF_FORCE_DECOMPRESS_X1 ?= 1
ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
ZSTD_NO_INLINE ?= 1
ZSTD_STRIP_ERROR_STRINGS ?= 1
ifneq ($(HAVE_CC_OZ), 0)
# Some compilers (clang) support an even more space-optimized setting.
CFLAGS += -Oz
else
2020-10-21 10:22:27 +00:00
CFLAGS += -Os
endif
CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
-DDYNAMIC_BMI2=0 -DNDEBUG
else
CFLAGS += -O3
endif
DEBUGLEVEL ?= 0
CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL)
2020-05-08 03:07:40 +00:00
ifeq ($(TARGET_SYSTEM),Windows_NT) # MinGW assumed
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
endif
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wpointer-arith \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS)
2015-01-24 00:58:16 +00:00
HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
GREP_OPTIONS ?=
ifeq ($HAVE_COLORNEVER, 1)
2020-10-21 10:22:27 +00:00
GREP_OPTIONS += --color=never
endif
GREP = grep $(GREP_OPTIONS)
SED_ERE_OPT ?= -E
2018-06-04 16:56:29 +00:00
ZSTDCOMMON_FILES := $(sort $(wildcard common/*.c))
ZSTDCOMP_FILES := $(sort $(wildcard compress/*.c))
ZSTDDECOMP_FILES := $(sort $(wildcard decompress/*.c))
ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
ZSTD_FILES := $(ZSTDCOMMON_FILES)
perf improvements for zstd decode (#1668) * perf improvements for zstd decode tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge) Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization. See how GCC autovectorizes the loop here: https://godbolt.org/z/apr0x0 Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on) After: https://godbolt.org/z/OwO4F8 Note that autovectorization still does not do a good job on the optimized version, so it's turned off\ via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both. silesia benchmark data - second triad of each file is with the original code: file orig compressedratio encode decode change 1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s 2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s 3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s 1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60% 2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01% 3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99% 1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s 2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s 3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s 1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30% 2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20% 3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50% 1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s 2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s 3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s 1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76% 2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54% 3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89% 1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s 2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s 3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s 1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47% 2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72% 3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29% 1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s 2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s 3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s 1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15% 2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98% 3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45% 1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s 2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s 3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s 1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41% 2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42% 3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68% 1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s 2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s 3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s 1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26% 2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36% 3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38% 1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s 2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s 3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s 1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45% 2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93% 3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04% 1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s 2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s 3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s 1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70% 2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85% 3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31% 1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s 2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s 3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s 1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89% 2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43% 3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33% 1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s 2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s 3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s 1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24% 2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99% 3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05% 1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s 2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s 3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s 1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52% 2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00% 3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37% 7.51% * makefile changed to only pass -fno-tree-vectorize to gcc * <Replace this line with a title. Use 1 line only, 67 chars or less> Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__) * fix for warning/error with subtraction of void* pointers * fix c90 conformance issue - ISO C90 forbids mixed declarations and code * Fix assert for negative diff, only when there is no overlap * fix overflow revealed in fuzzing tests * tweak for small speed increase
2019-07-11 22:31:07 +00:00
ifeq ($(findstring GCC,$(CCVER)),GCC)
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
perf improvements for zstd decode (#1668) * perf improvements for zstd decode tldr: 7.5% average decode speedup on silesia corpus at compression levels 1-3 (sandy bridge) Background: while investigating zstd perf differences between clang and gcc I noticed that even though gcc is vectorizing the loop in in wildcopy, it was not being done as well as could be done by hand. The sites where wildcopy is invoked have an interesting distribution of lengths to be copied. The loop trip count is rarely above 1, yet long copies are common enough to make their performance important.The code in zstd_decompress.c to invoke wildcopy handles the latter well but the gcc autovectorizer introduces a needlessly expensive startup check for vectorization. See how GCC autovectorizes the loop here: https://godbolt.org/z/apr0x0 Here is the code after this diff has been applied: (left hand side is the good one, right is with vectorizer on) After: https://godbolt.org/z/OwO4F8 Note that autovectorization still does not do a good job on the optimized version, so it's turned off\ via attribute and flag. I found that neither attribute nor command-line flag were entirely successful in turning off vectorization, which is why there were both. silesia benchmark data - second triad of each file is with the original code: file orig compressedratio encode decode change 1#dickens 10192446-> 4268865(2.388), 198.9MB/s 709.6MB/s 2#dickens 10192446-> 3876126(2.630), 128.7MB/s 552.5MB/s 3#dickens 10192446-> 3682956(2.767), 104.6MB/s 537MB/s 1#dickens 10192446-> 4268865(2.388), 195.4MB/s 659.5MB/s 7.60% 2#dickens 10192446-> 3876126(2.630), 127MB/s 516.3MB/s 7.01% 3#dickens 10192446-> 3682956(2.767), 105MB/s 479.5MB/s 11.99% 1#mozilla 51220480-> 20117517(2.546), 285.4MB/s 734.9MB/s 2#mozilla 51220480-> 19067018(2.686), 220.8MB/s 686.3MB/s 3#mozilla 51220480-> 18508283(2.767), 152.2MB/s 669.4MB/s 1#mozilla 51220480-> 20117517(2.546), 283.4MB/s 697.9MB/s 5.30% 2#mozilla 51220480-> 19067018(2.686), 225.9MB/s 665MB/s 3.20% 3#mozilla 51220480-> 18508283(2.767), 154.5MB/s 640.6MB/s 4.50% 1#mr 9970564-> 3840242(2.596), 262.4MB/s 899.8MB/s 2#mr 9970564-> 3600976(2.769), 181.2MB/s 717.9MB/s 3#mr 9970564-> 3563987(2.798), 116.3MB/s 620MB/s 1#mr 9970564-> 3840242(2.596), 253.2MB/s 827.3MB/s 8.76% 2#mr 9970564-> 3600976(2.769), 177.4MB/s 655.4MB/s 9.54% 3#mr 9970564-> 3563987(2.798), 111.2MB/s 564.2MB/s 9.89% 1#nci 33553445-> 2849306(11.78), 575.2MB/s , 1335.8MB/s 2#nci 33553445-> 2890166(11.61), 509.3MB/s , 1238.1MB/s 3#nci 33553445-> 2857408(11.74), 431MB/s , 1210.7MB/s 1#nci 33553445-> 2849306(11.78), 565.4MB/s , 1220.2MB/s 9.47% 2#nci 33553445-> 2890166(11.61), 508.2MB/s , 1128.4MB/s 9.72% 3#nci 33553445-> 2857408(11.74), 429.1MB/s , 1097.7MB/s 10.29% 1#ooffice 6152192-> 3590954(1.713), 231.4MB/s , 662.6MB/s 2#ooffice 6152192-> 3323931(1.851), 162.8MB/s , 592.6MB/s 3#ooffice 6152192-> 3145625(1.956), 99.9MB/s , 549.6MB/s 1#ooffice 6152192-> 3590954(1.713), 224.7MB/s , 624.2MB/s 6.15% 2#ooffice 6152192-> 3323931 (1.851), 155MB/s , 564.5MB/s 4.98% 3#ooffice 6152192-> 3145625(1.956), 101.1MB/s , 521.2MB/s 5.45% 1#osdb 10085684-> 3739042(2.697), 271.9MB/s 876.4MB/s 2#osdb 10085684-> 3493875(2.887), 208.2MB/s 857MB/s 3#osdb 10085684-> 3515831(2.869), 135.3MB/s 805.4MB/s 1#osdb 10085684-> 3739042(2.697), 257.4MB/s 793.8MB/s 10.41% 2#osdb 10085684-> 3493875(2.887), 209.7MB/s 776.1MB/s 10.42% 3#osdb 10085684-> 3515831(2.869), 130.6MB/s 727.7MB/s 10.68% 1#reymont 6627202-> 2152771(3.078), 198.9MB/s 696.2MB/s 2#reymont 6627202-> 2071140(3.200), 170MB/s 595.2MB/s 3#reymont 6627202-> 1953597(3.392), 128.5MB/s 609.7MB/s 1#reymont 6627202-> 2152771(3.078), 199.6MB/s 655.2MB/s 6.26% 2#reymont 6627202-> 2071140(3.200), 168.2MB/s 554.4MB/s 7.36% 3#reymont 6627202-> 1953597(3.392), 128.7MB/s 557.4MB/s 9.38% 1#samba 21606400-> 5510994(3.921), 338.1MB/s 1066MB/s 2#samba 21606400-> 5240208(4.123), 258.7MB/s 992.3MB/s 3#samba 21606400-> 5003358(4.318), 200.2MB/s 991.1MB/s 1#samba 21606400-> 5510994(3.921), 330.8MB/s 974MB/s 9.45% 2#samba 21606400-> 5240208(4.123), 257.9MB/s 919.4MB/s 7.93% 3#samba 21606400-> 5003358(4.318), 198.5MB/s 908.9MB/s 9.04% 1#sao 7251944-> 6256401(1.159), 194.6MB/s 602.2MB/s 2#sao 7251944-> 5808761(1.248), 128.2MB/s 532.1MB/s 3#sao 7251944-> 5556318(1.305), 73MB/s 509.4MB/s 1#sao 7251944-> 6256401(1.159), 198.7MB/s 580.7MB/s 3.70% 2#sao 7251944-> 5808761(1.248), 129.1MB/s 502.7MB/s 5.85% 3#sao 7251944-> 5556318(1.305), 74.6MB/s 493.1MB/s 3.31% 1#webster 41458703-> 13692222(3.028), 222.3MB/s 752MB/s 2#webster 41458703-> 12842646(3.228), 157.6MB/s 532.2MB/s 3#webster 41458703-> 12191964(3.400), 124MB/s 468.5MB/s 1#webster 41458703-> 13692222(3.028), 219.7MB/s 697MB/s 7.89% 2#webster 41458703-> 12842646(3.228), 153.9MB/s 495.4MB/s 7.43% 3#webster 41458703-> 12191964(3.400), 124.8MB/s 444.8MB/s 5.33% 1#xml 5345280-> 696652(7.673), 485MB/s , 1333.9MB/s 2#xml 5345280-> 681492(7.843), 405.2MB/s , 1237.5MB/s 3#xml 5345280-> 639057(8.364), 328.5MB/s , 1281.3MB/s 1#xml 5345280-> 696652(7.673), 473.1MB/s , 1232.4MB/s 8.24% 2#xml 5345280-> 681492(7.843), 398.6MB/s , 1145.9MB/s 7.99% 3#xml 5345280-> 639057(8.364), 327.1MB/s , 1175MB/s 9.05% 1#x-ray 8474240-> 6772557(1.251), 521.3MB/s 762.6MB/s 2#x-ray 8474240-> 6684531(1.268), 230.5MB/s 688.5MB/s 3#x-ray 8474240-> 6166679(1.374), 68.7MB/s 478.8MB/s 1#x-ray 8474240-> 6772557(1.251), 502.8MB/s 736.7MB/s 3.52% 2#x-ray 8474240-> 6684531(1.268), 224.4MB/s 662MB/s 4.00% 3#x-ray 8474240-> 6166679(1.374), 67.3MB/s 437.8MB/s 9.37% 7.51% * makefile changed to only pass -fno-tree-vectorize to gcc * <Replace this line with a title. Use 1 line only, 67 chars or less> Don't add "no-tree-vectorize" attribute on clang (which defines __GNUC__) * fix for warning/error with subtraction of void* pointers * fix c90 conformance issue - ISO C90 forbids mixed declarations and code * Fix assert for negative diff, only when there is no overlap * fix overflow revealed in fuzzing tests * tweak for small speed increase
2019-07-11 22:31:07 +00:00
endif
# Modules
2018-06-04 16:56:29 +00:00
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
ZSTD_LIB_DICTBUILDER ?= 1
ZSTD_LIB_DEPRECATED ?= 1
# Legacy support
ZSTD_LEGACY_SUPPORT ?= 5
ZSTD_LEGACY_MULTITHREADED_API ?= 0
# Build size optimizations
HUF_FORCE_DECOMPRESS_X1 ?= 0
HUF_FORCE_DECOMPRESS_X2 ?= 0
ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
ZSTD_NO_INLINE ?= 0
ZSTD_STRIP_ERROR_STRINGS ?= 0
2018-06-05 00:19:06 +00:00
2018-06-04 16:56:29 +00:00
ifeq ($(ZSTD_LIB_COMPRESSION), 0)
2020-10-21 10:22:27 +00:00
ZSTD_LIB_DICTBUILDER = 0
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
endif
ifeq ($(ZSTD_LIB_DECOMPRESSION), 0)
2020-10-21 10:22:27 +00:00
ZSTD_LEGACY_SUPPORT = 0
ZSTD_LIB_DEPRECATED = 0
2018-06-04 16:56:29 +00:00
endif
ifneq ($(ZSTD_LIB_COMPRESSION), 0)
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $(ZSTDCOMP_FILES)
2018-06-04 16:56:29 +00:00
endif
ifneq ($(ZSTD_LIB_DECOMPRESSION), 0)
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $(ZSTDDECOMP_FILES)
2018-06-04 16:56:29 +00:00
endif
2018-06-05 00:19:06 +00:00
ifneq ($(ZSTD_LIB_DEPRECATED), 0)
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $(ZDEPR_FILES)
2018-06-04 16:56:29 +00:00
endif
ifneq ($(ZSTD_LIB_DICTBUILDER), 0)
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $(ZDICT_FILES)
2018-06-04 16:56:29 +00:00
endif
ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DHUF_FORCE_DECOMPRESS_X1
endif
ifneq ($(HUF_FORCE_DECOMPRESS_X2), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DHUF_FORCE_DECOMPRESS_X2
endif
ifneq ($(ZSTD_FORCE_DECOMPRESS_SHORT), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT
endif
ifneq ($(ZSTD_FORCE_DECOMPRESS_LONG), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG
endif
ifneq ($(ZSTD_NO_INLINE), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_NO_INLINE
endif
ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_STRIP_ERROR_STRINGS
endif
ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0)
2020-10-21 10:22:27 +00:00
CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API
endif
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
2020-10-21 10:22:27 +00:00
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
endif
endif
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
ZSTD_LOCAL_SRC := $(notdir $(ZSTD_FILES))
ZSTD_LOCAL_OBJ := $(ZSTD_LOCAL_SRC:.c=.o)
ZSTD_SUBDIR := common compress decompress dictBuilder legacy deprecated
vpath %.c $(ZSTD_SUBDIR)
UNAME := $(shell uname)
ifndef BUILD_DIR
ifeq ($(UNAME), Darwin)
HASH ?= md5
else ifeq ($(UNAME), FreeBSD)
HASH ?= gmd5sum
else ifeq ($(UNAME), OpenBSD)
HASH ?= md5
endif
HASH ?= md5sum
HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " " )
HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
ifeq ($(HAVE_HASH),0)
$(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
BUILD_DIR := obj/generic_noconf
endif
endif # BUILD_DIR
2018-06-09 19:31:17 +00:00
# macOS linker doesn't support -soname, and use different extension
2015-01-24 00:58:16 +00:00
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
ifeq ($(UNAME), Darwin)
SHARED_EXT = dylib
SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
2015-01-24 00:58:16 +00:00
else
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
SHARED_EXT = so
SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
2015-01-24 00:58:16 +00:00
endif
SET_CACHE_DIRECTORY = \
$(MAKE) --no-print-directory $@ \
BUILD_DIR=obj/$(HASH_DIR) \
CPPFLAGS="$(CPPFLAGS)" \
CFLAGS="$(CFLAGS)" \
LDFLAGS="$(LDFLAGS)"
2015-01-24 00:58:16 +00:00
.PHONY: lib-all all clean install uninstall
2015-11-19 10:23:44 +00:00
# alias
lib-all: all
all: lib
2015-11-19 10:23:44 +00:00
.PHONY: libzstd.a # must be run every time
ifndef BUILD_DIR
# determine BUILD_DIR from compilation flags
libzstd.a:
$(SET_CACHE_DIRECTORY)
else
# BUILD_DIR is defined
ZSTD_STATLIB_DIR := $(BUILD_DIR)/static
ZSTD_STATLIB := $(ZSTD_STATLIB_DIR)/libzstd.a
ZSTD_STATLIB_OBJ := $(addprefix $(ZSTD_STATLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
2020-10-23 17:41:17 +00:00
$(ZSTD_STATLIB): ARFLAGS = rcs
$(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
$(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
2015-01-24 00:58:16 +00:00
@echo compiling static library
2020-10-23 17:22:52 +00:00
$(AR) $(ARFLAGS) $@ $^
libzstd.a: $(ZSTD_STATLIB)
2020-10-23 17:22:52 +00:00
ln -sf $< $@
endif
2020-05-08 03:07:40 +00:00
ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
LIBZSTD = dll\libzstd.dll
$(LIBZSTD): $(ZSTD_FILES)
2015-01-24 00:58:16 +00:00
@echo compiling dynamic library $(LIBVER)
$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.dll.a -shared $^ -o $@
else # not Windows
LIBZSTD = libzstd.$(SHARED_EXT_VER)
.PHONY: $(LIBZSTD) # must be run every time
$(LIBZSTD): CFLAGS += -fPIC
$(LIBZSTD): LDFLAGS += -shared -fvisibility=hidden
ifndef BUILD_DIR
# determine BUILD_DIR from compilation flags
$(LIBZSTD):
$(SET_CACHE_DIRECTORY)
else
# BUILD_DIR is defined
ZSTD_DYNLIB_DIR := $(BUILD_DIR)/dynamic
ZSTD_DYNLIB := $(ZSTD_DYNLIB_DIR)/$(LIBZSTD)
ZSTD_DYNLIB_OBJ := $(addprefix $(ZSTD_DYNLIB_DIR)/,$(ZSTD_LOCAL_OBJ))
$(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR)
$(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ)
@echo compiling dynamic library $(LIBVER)
2020-10-23 17:22:52 +00:00
$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
2015-01-24 00:58:16 +00:00
@echo creating versioned links
2020-10-23 17:22:52 +00:00
ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
ln -sf $@ libzstd.$(SHARED_EXT)
$(LIBZSTD): $(ZSTD_DYNLIB)
2020-10-23 17:22:52 +00:00
ln -sf $< $@
endif # ifndef BUILD_DIR
endif # if windows
support for verbose make A commonly accepted makefile idiom is V=1 or VERBOSE=1 to request the printing of all commands. This is not "default" though, and must be manually added. Example : Before : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 creating versioned links ``` After : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 cc -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=5 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wmissing-prototypes -Wc++-compat -O3 common/debug.c common/entropy_common.c common/error_private.c common/fse_decompress.c common/pool.c common/threading.c common/xxhash.c common/zstd_common.c compress/fse_compress.c compress/hist.c compress/huf_compress.c compress/zstd_compress.c compress/zstd_compress_literals.c compress/zstd_compress_sequences.c compress/zstd_compress_superblock.c compress/zstd_double_fast.c compress/zstd_fast.c compress/zstd_lazy.c compress/zstd_ldm.c compress/zstd_opt.c compress/zstdmt_compress.c decompress/huf_decompress.c decompress/zstd_ddict.c decompress/zstd_decompress.c decompress/zstd_decompress_block.c deprecated/zbuff_common.c deprecated/zbuff_compress.c deprecated/zbuff_decompress.c dictBuilder/cover.c dictBuilder/divsufsort.c dictBuilder/fastcover.c dictBuilder/zdict.c legacy/zstd_v05.c legacy/zstd_v06.c legacy/zstd_v07.c -shared -fPIC -fvisibility=hidden -Wl,-soname=libzstd.so.1 -o libzstd.so.1.4.5 creating versioned links ln -sf libzstd.so.1.4.5 libzstd.so.1 ln -sf libzstd.so.1.4.5 libzstd.so ```
2020-05-07 14:59:56 +00:00
.PHONY: libzstd
libzstd : $(LIBZSTD)
support for verbose make A commonly accepted makefile idiom is V=1 or VERBOSE=1 to request the printing of all commands. This is not "default" though, and must be manually added. Example : Before : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 creating versioned links ``` After : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 cc -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=5 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wmissing-prototypes -Wc++-compat -O3 common/debug.c common/entropy_common.c common/error_private.c common/fse_decompress.c common/pool.c common/threading.c common/xxhash.c common/zstd_common.c compress/fse_compress.c compress/hist.c compress/huf_compress.c compress/zstd_compress.c compress/zstd_compress_literals.c compress/zstd_compress_sequences.c compress/zstd_compress_superblock.c compress/zstd_double_fast.c compress/zstd_fast.c compress/zstd_lazy.c compress/zstd_ldm.c compress/zstd_opt.c compress/zstdmt_compress.c decompress/huf_decompress.c decompress/zstd_ddict.c decompress/zstd_decompress.c decompress/zstd_decompress_block.c deprecated/zbuff_common.c deprecated/zbuff_compress.c deprecated/zbuff_decompress.c dictBuilder/cover.c dictBuilder/divsufsort.c dictBuilder/fastcover.c dictBuilder/zdict.c legacy/zstd_v05.c legacy/zstd_v06.c legacy/zstd_v07.c -shared -fPIC -fvisibility=hidden -Wl,-soname=libzstd.so.1 -o libzstd.so.1.4.5 creating versioned links ln -sf libzstd.so.1.4.5 libzstd.so.1 ln -sf libzstd.so.1.4.5 libzstd.so ```
2020-05-07 14:59:56 +00:00
.PHONY: lib
lib : libzstd.a libzstd
# note : do not define lib-mt or lib-release as .PHONY
# make does not consider implicit pattern rule for .PHONY target
%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
%-mt : LDFLAGS += -pthread
2020-04-29 01:43:55 +00:00
%-mt : %
@echo multi-threading build completed
2017-04-18 21:13:01 +00:00
%-release : DEBUGFLAGS :=
2020-04-29 01:43:55 +00:00
%-release : %
@echo release build completed
# Generate .h dependencies automatically
DEPFLAGS = -MT $@ -MMD -MP -MF
$(ZSTD_DYNLIB_DIR)/%.o : %.c $(ZSTD_DYNLIB_DIR)/%.d | $(ZSTD_DYNLIB_DIR)
@echo CC $@
2020-10-23 17:22:52 +00:00
$(COMPILE.c) $(DEPFLAGS) $(ZSTD_DYNLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
$(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
@echo CC $@
2020-10-23 17:22:52 +00:00
$(COMPILE.c) $(DEPFLAGS) $(ZSTD_STATLIB_DIR)/$*.d $(OUTPUT_OPTION) $<
MKDIR ?= mkdir
$(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
2020-10-23 17:22:52 +00:00
$(MKDIR) -p $@
DEPFILES := $(ZSTD_DYNLIB_OBJ:.o=.d) $(ZSTD_STATLIB_OBJ:.o=.d)
$(DEPFILES):
include $(wildcard $(DEPFILES))
# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
ZSTDMT_FILES = compress/zstdmt_compress.c
ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden
libzstd-nomt: $(ZSTD_NOMT_FILES)
@echo compiling single-thread dynamic library $(LIBVER)
@echo files : $(ZSTD_NOMT_FILES)
2020-10-23 17:22:52 +00:00
$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
2015-01-24 00:58:16 +00:00
clean:
2020-10-23 17:22:52 +00:00
$(RM) -r *.dSYM # macOS-specific
$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
$(RM) -r obj/*
2015-01-24 00:58:16 +00:00
@echo Cleaning library completed
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
support for verbose make A commonly accepted makefile idiom is V=1 or VERBOSE=1 to request the printing of all commands. This is not "default" though, and must be manually added. Example : Before : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 creating versioned links ``` After : ``` make libzstd compiling dynamic library 1.4.5 creating versioned links make libzstd V=1 compiling dynamic library 1.4.5 cc -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=5 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings -Wredundant-decls -Wmissing-prototypes -Wc++-compat -O3 common/debug.c common/entropy_common.c common/error_private.c common/fse_decompress.c common/pool.c common/threading.c common/xxhash.c common/zstd_common.c compress/fse_compress.c compress/hist.c compress/huf_compress.c compress/zstd_compress.c compress/zstd_compress_literals.c compress/zstd_compress_sequences.c compress/zstd_compress_superblock.c compress/zstd_double_fast.c compress/zstd_fast.c compress/zstd_lazy.c compress/zstd_ldm.c compress/zstd_opt.c compress/zstdmt_compress.c decompress/huf_decompress.c decompress/zstd_ddict.c decompress/zstd_decompress.c decompress/zstd_decompress_block.c deprecated/zbuff_common.c deprecated/zbuff_compress.c deprecated/zbuff_decompress.c dictBuilder/cover.c dictBuilder/divsufsort.c dictBuilder/fastcover.c dictBuilder/zdict.c legacy/zstd_v05.c legacy/zstd_v06.c legacy/zstd_v07.c -shared -fPIC -fvisibility=hidden -Wl,-soname=libzstd.so.1 -o libzstd.so.1.4.5 creating versioned links ln -sf libzstd.so.1.4.5 libzstd.so.1 ln -sf libzstd.so.1.4.5 libzstd.so ```
2020-05-07 14:59:56 +00:00
# make install is validated only for below listed environments
2016-12-23 09:05:49 +00:00
#-----------------------------------------------------------------------------
ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
2016-12-22 19:14:37 +00:00
all: libzstd.pc
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
# support both lower and uppercase (BSD), use uppercase in script
prefix ?= /usr/local
PREFIX ?= $(prefix)
exec_prefix ?= $(PREFIX)
EXEC_PREFIX ?= $(exec_prefix)
libdir ?= $(EXEC_PREFIX)/lib
LIBDIR ?= $(libdir)
includedir ?= $(PREFIX)/include
INCLUDEDIR ?= $(includedir)
2016-12-22 19:14:37 +00:00
PCLIBDIR ?= $(shell echo "$(LIBDIR)" | sed -n $(SED_ERE_OPT) -e "s@^$(EXEC_PREFIX)(/|$$)@@p")
PCINCDIR ?= $(shell echo "$(INCLUDEDIR)" | sed -n $(SED_ERE_OPT) -e "s@^$(PREFIX)(/|$$)@@p")
ifeq (,$(PCLIBDIR))
# Additional prefix check is required, since the empty string is technically a
# valid PCLIBDIR
ifeq (,$(shell echo "$(LIBDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(EXEC_PREFIX)(/|$$)@ p"))
$(error configured libdir ($(LIBDIR)) is outside of prefix ($(EXEC_PREFIX)), can't generate pkg-config file)
endif
endif
ifeq (,$(PCINCDIR))
# Additional prefix check is required, since the empty string is technically a
# valid PCINCDIR
ifeq (,$(shell echo "$(INCLUDEDIR)" | sed -n $(SED_ERE_OPT) -e "\\@^$(PREFIX)(/|$$)@ p"))
$(error configured includedir ($(INCLUDEDIR)) is outside of exec_prefix ($(PREFIX)), can't generate pkg-config file)
endif
endif
ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly))
2020-10-21 10:22:27 +00:00
PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
2016-12-22 19:14:37 +00:00
else
2020-10-21 10:22:27 +00:00
PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
2016-12-22 19:14:37 +00:00
endif
ifneq (,$(filter $(UNAME),SunOS))
2020-10-21 10:22:27 +00:00
INSTALL ?= ginstall
else
2020-10-21 10:22:27 +00:00
INSTALL ?= install
endif
INSTALL_PROGRAM ?= $(INSTALL)
INSTALL_DATA ?= $(INSTALL) -m 644
2015-01-24 00:58:16 +00:00
libzstd.pc:
libzstd.pc: libzstd.pc.in
2015-01-24 00:58:16 +00:00
@echo creating pkgconfig
2020-10-23 17:22:52 +00:00
@sed $(SED_ERE_OPT) -e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@LIBDIR@|$(PCLIBDIR)|' \
-e 's|@INCLUDEDIR@|$(PCINCDIR)|' \
-e 's|@VERSION@|$(VERSION)|' \
$< >$@
2015-01-24 00:58:16 +00:00
install: install-pc install-static install-shared install-includes
@echo zstd static and shared library installed
install-pc: libzstd.pc
[ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
2020-10-23 17:22:52 +00:00
$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
install-static:
# only generate libzstd.a if it's not already present
[ -e libzstd.a ] || $(MAKE) libzstd.a-release
[ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
@echo Installing static library
2020-10-23 17:22:52 +00:00
$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
install-shared:
# only generate libzstd.so if it's not already present
[ -e $(LIBZSTD) ] || $(MAKE) libzstd-release
[ -e $(DESTDIR)$(LIBDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
@echo Installing shared library
2020-10-23 17:22:52 +00:00
$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR)
ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
install-includes:
[ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
2016-12-22 19:14:37 +00:00
@echo Installing includes
2020-10-23 17:22:52 +00:00
$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
2015-01-24 00:58:16 +00:00
uninstall:
2020-10-23 17:22:52 +00:00
$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD)
$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc
$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
2015-01-24 00:58:16 +00:00
@echo zstd libraries successfully uninstalled
endif