From 5e45a5fbb3a98f5649f60167dc5379147204e54e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 21 Jul 2016 18:36:50 -0700 Subject: [PATCH] force loop-align to 32 for zstd_decompress --- lib/Makefile | 1 + lib/decompress/zstd_decompress.c | 10 ++++++---- programs/Makefile | 13 +++++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 1f4f7eb8..6df2b1a0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -95,6 +95,7 @@ libzstd: $(ZSTD_FILES) clean: @rm -f core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc + @rm -f decompress/*.o @echo Cleaning library completed #------------------------------------------------------------------------ diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 16c85cf3..7299b6b5 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -467,14 +467,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, switch((litBlockType_t)(istart[0] & 3)) { case lbt_huffman: - { size_t lhSize, litSize, litCSize, singleStream=0; + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_read32(istart); if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */ switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ /* 2 - 2 - 10 - 10 */ - { U32 const lhc = MEM_readLE32(istart); + { //U32 const lhc = MEM_readLE32(istart); singleStream = lhlCode; lhSize = 3; litSize = (lhc >> 4) & 0x3FF; @@ -483,7 +485,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } case 2: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = MEM_readLE32(istart); + { //U32 const lhc = MEM_readLE32(istart); lhSize = 4; litSize = (lhc >> 4) & 0x3FFF; litCSize = lhc >> 18; @@ -491,7 +493,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } case 3: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = MEM_readLE32(istart); + { //U32 const lhc = MEM_readLE32(istart); lhSize = 5; litSize = (lhc >> 4) & 0x3FFFF; litCSize = (lhc >> 22) + (istart[4] << 10); diff --git a/programs/Makefile b/programs/Makefile index 75d3154d..effa9440 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -38,15 +38,21 @@ MANDIR = $(PREFIX)/share/man/man1 ZSTDDIR = ../lib +ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version "), 1) +ALIGN_LOOP = -falign-loops=32 +else +ALIGN_LOOP = +endif + CPPFLAGS= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -DXXH_NAMESPACE=ZSTD_ -CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial +CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c ZSTDCOMP_FILES := $(ZSTDDIR)/compress/zstd_compress.c $(ZSTDDIR)/compress/fse_compress.c $(ZSTDDIR)/compress/huf_compress.c -ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/zstd_decompress.c $(ZSTDDIR)/decompress/huf_decompress.c +ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/zstd_decompress.o $(ZSTDDIR)/decompress/huf_decompress.c ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZBUFF_FILES := $(ZSTDDIR)/compress/zbuff_compress.c $(ZSTDDIR)/decompress/zbuff_decompress.c ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c @@ -80,6 +86,8 @@ default: zstd all: zstd fullbench fuzzer zbufftest paramgrill datagen zstd32 fullbench32 fuzzer32 zbufftest32 +$(ZSTDDIR)/decompress/zstd_decompress.o: CFLAGS += $(ALIGN_LOOP) + zstd : $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZBUFF_FILES) $(ZDICT_FILES) \ zstdcli.c fileio.c bench.c datagen.c dibio.c $(CC) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT) @@ -146,6 +154,7 @@ roundTripCrash : $(ZSTD_FILES) roundTripCrash.c $(CC) $(FLAGS) $^ -o $@$(EXT) clean: + $(MAKE) -C ../lib clean @rm -f core *.o tmp* result* *.gcda dictionary *.zst \ zstd$(EXT) zstd32$(EXT) zstd-compress$(EXT) zstd-decompress$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \