diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index dfb8f191..c7d221f3 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -7,28 +7,28 @@ # in the COPYING file in the root directory of this source tree). # ################################################################ +# Optionally user defined flags CFLAGS ?= -O3 CXXFLAGS ?= -O3 +CPPFLAGS ?= +LDFLAGS ?= +ARFLAGS ?= +LIB_FUZZING_ENGINE ?= libregression.a ZSTDDIR = ../../lib PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ - -DZSTD_DEBUG=1 -DMEM_FORCE_MEMORY_ACCESS=0 \ - -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION $(CPPFLAGS) -FUZZ_CFLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + $(CPPFLAGS) +FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef -Wformat-security \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls \ - -g -fno-omit-frame-pointer $(CFLAGS) -FUZZ_CXXFLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum \ - -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \ - -Wformat-security -Wvla -Wformat=2 -Winit-self -Wfloat-equal \ - -Wwrite-strings -Wredundant-decls \ - -g -fno-omit-frame-pointer -std=c++11 $(CXXFLAGS) + -g -fno-omit-frame-pointer +FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) +FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) FUZZ_LDFLAGS := $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) @@ -40,9 +40,8 @@ ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) -ZSTD_OBJ := $(patsubst %.c,%.o, $(wildcard $(ZSTD_FILES))) +ZSTD_OBJ := $(patsubst %.c,%.o, $(wildcard $(ZSTD_FILES))) -LIBFUZZER ?= -lFuzzer .PHONY: default all clean @@ -51,50 +50,35 @@ default: all all: \ simple_round_trip \ stream_round_trip \ + block_round_trip \ simple_decompress \ - stream_decompress + stream_decompress \ + block_decompress %.o: %.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@ simple_round_trip: $(FUZZ_HEADERS) $(ZSTD_OBJ) simple_round_trip.o - $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_round_trip.o $(LIBFUZZER) -o $@ + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ stream_round_trip: $(FUZZ_HEADERS) $(ZSTD_OBJ) stream_round_trip.o - $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_round_trip.o $(LIBFUZZER) -o $@ + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +block_round_trip: $(FUZZ_HEADERS) $(ZSTD_OBJ) block_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) block_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ simple_decompress: $(FUZZ_HEADERS) $(ZSTD_OBJ) simple_decompress.o - $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_decompress.o $(LIBFUZZER) -o $@ + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) simple_decompress.o $(LIB_FUZZING_ENGINE) -o $@ stream_decompress: $(FUZZ_HEADERS) $(ZSTD_OBJ) stream_decompress.o - $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_decompress.o $(LIBFUZZER) -o $@ + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) stream_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + +block_decompress: $(FUZZ_HEADERS) $(ZSTD_OBJ) block_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(ZSTD_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o -%-regression: libregression.a - $(RM) $* - $(MAKE) $* LDFLAGS="$(FUZZ_LDFLAGS) -L." LIBFUZZER=-lregression - -%-regression-test: %-regression - ./$* corpora/$* - -regression-test: \ - simple_round_trip-regression-test \ - stream_round_trip-regression-test \ - simple_decompress-regression-test \ - stream_decompress-regression-test - -%-msan: clean - $(MAKE) $* CFLAGS="-fsanitize=memory $(FUZZ_CFLAGS)" \ - CXXFLAGS="-fsanitize=memory $(FUZZ_CXXFLAGS)" - -UASAN_FLAGS := -fsanitize=address,undefined -fno-sanitize-recover=undefined \ - -fno-sanitize=pointer-overflow -%-uasan: clean - $(MAKE) $* CFLAGS="$(FUZZ_CFLAGS) $(UASAN_FLAGS)" \ - CXXFLAGS="$(FUZZ_CXXFLAGS) $(UASAN_FLAGS)" - # Install libfuzzer (not usable for MSAN testing) # Provided for convienence. To use this library run make libFuzzer and # set LDFLAGS=-L. @@ -102,7 +86,7 @@ UASAN_FLAGS := -fsanitize=address,undefined -fno-sanitize-recover=undefined \ libFuzzer: @$(RM) -rf Fuzzer @git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer - @./Fuzzer/build.sh + @cd Fuzzer && ./build.sh clean: @$(MAKE) -C $(ZSTDDIR) clean diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index 38a4f3d1..6d0fab55 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -2,33 +2,87 @@ Each fuzzing target can be built with multiple engines. +## fuzz.py + +`fuzz.py` is a helper script for building and running fuzzers. +Run `./fuzz.py -h` for the commands and run `./fuzz.py COMMAND -h` for +command specific help. + +### Generating Data + +`fuzz.py` provides a utility to generate seed data for each fuzzer. + +``` +make -C ../tests decodecorpus +./fuzz.py gen TARGET +``` + +By default it outputs 100 samples, each at most 8KB into `corpora/TARGET-seed`, +but that can be configured with the `--number`, `--max-size-log` and `--seed` +flags. + +### Build +It respects the usual build environment variables `CC`, `CFLAGS`, etc. +The environment variables can be overridden with the corresponding flags +`--cc`, `--cflags`, etc. +The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or +`--lib-fuzzing-engine`, the default is `libregression.a`. +It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and +coverage instrumentation `--enable-coverage`. +It sets sane defaults which can be overriden with flags `--debug`, +`--enable-ubsan-pointer-overlow`, etc. +Run `./fuzz.py build -h` for help. + +### Running Fuzzers + +`./fuzz.py` can run `libfuzzer`, `afl`, and `regression` tests. +See the help of the relevant command for options. +Flags not parsed by `fuzz.py` are passed to the fuzzing engine. +The command used to run the fuzzer is printed for debugging. + ## LibFuzzer -You can install `libFuzzer` with `make libFuzzer`. Then you can make each target -with `make target LDFLAGS=-L. CC=clang CXX=clang++`. +``` +# Build libfuzzer if necessary +make libFuzzer +# Build the fuzz targets +./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++ +# OR equivalently +CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan +# Run the fuzzer +./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4 +``` + +where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. + +### MSAN + +Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library +and libFuzzer with MSAN. +`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, +`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass +the extra parameters only for MSAN. ## AFL -The regression driver also serves as a binary for `afl-fuzz`. You can make each -target with one of these commands: +The default `LIB_FUZZING_ENGINE` is `libregression.a`, which produces a binary +that AFL can use. ``` -make target-regression CC=afl-clang CXX=afl-clang++ -AFL_MSAN=1 make target-regression-msan CC=afl-clang CXX=afl-clang++ -AFL_ASAN=1 make target-regression-uasan CC=afl-clang CXX=afl-clang++ +# Build the fuzz targets +CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan +# Run the fuzzer without a memory limit because of ASAN +./fuzz.py afl TARGET -m none ``` -Then run as `./target @@`. - ## Regression Testing -Each fuzz target has a corpus checked into the repo under `fuzz/corpora/`. -You can run regression tests on the corpora to ensure that inputs which -previously exposed bugs still pass. You can make these targets to run the -regression tests with different sanitizers. +The regression rest supports the `all` target to run all the fuzzers in one +command. ``` -make regression-test -make regression-test-msan -make regression-test-uasan +CC=clang CXX=clang++ ./fuzz.py build all --enable-asan --enable-ubsan +./fuzz.py regression all +CC=clang CXX=clang++ ./fuzz.py build all --enable-msan +./fuzz.py regression all ``` diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c new file mode 100644 index 00000000..3cccc32f --- /dev/null +++ b/tests/fuzz/block_decompress.c @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX; + + FUZZ_seed(&src, &size); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + ZSTD_decompressBegin(dctx); + ZSTD_decompressBlock(dctx, rBuf, neededBufSize, src, size); + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c new file mode 100644 index 00000000..3b3f2ff6 --- /dev/null +++ b/tests/fuzz/block_round_trip.c @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + size_t ret = ZSTD_compressBegin(cctx, cLevel); + + if (ZSTD_isError(ret)) { + fprintf(stderr, "ZSTD_compressBegin() error: %s\n", + ZSTD_getErrorName(ret)); + return ret; + } + + ret = ZSTD_compressBlock(cctx, compressed, compressedCapacity, src, srcSize); + if (ZSTD_isError(ret)) { + fprintf(stderr, "ZSTD_compressBlock() error: %s\n", ZSTD_getErrorName(ret)); + return ret; + } + if (ret == 0) { + FUZZ_ASSERT(resultCapacity >= srcSize); + memcpy(result, src, srcSize); + return srcSize; + } + ZSTD_decompressBegin(dctx); + return ZSTD_decompressBlock(dctx, result, resultCapacity, compressed, ret); +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + seed = FUZZ_seed(&src, &size); + neededBufSize = size; + if (size > ZSTD_BLOCKSIZE_MAX) + return 0; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize || !cBuf || !rBuf) { + free(cBuf); + free(rBuf); + cBuf = malloc(neededBufSize); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(cBuf && rBuf); + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + FUZZ_ASSERT_MSG(!ZSTD_isError(result), ZSTD_getErrorName(result)); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 3017db3a..a6484547 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -12,15 +12,17 @@ * Fuzz targets have some common parameters passed as macros during compilation. * Check the documentation for each individual fuzzer for more parameters. * - * @param STATEFULL_FUZZING: + * @param STATEFUL_FUZZING: * Define this to reuse state between fuzzer runs. This can be useful to * test code paths which are only executed when contexts are reused. * WARNING: Makes reproducing crashes much harder. * Default: Not defined. * @param FUZZ_RNG_SEED_SIZE: * The number of bytes of the source to look at when constructing a seed - * for the deterministic RNG. - * Default: 128. + * for the deterministic RNG. These bytes are discarded before passing + * the data to zstd functions. Every fuzzer initializes the RNG exactly + * once before doing anything else, even if it is unused. + * Default: 4. * @param ZSTD_DEBUG: * This is a parameter for the zstd library. Defining `ZSTD_DEBUG=1` * enables assert() statements in the zstd library. Higher levels enable @@ -41,12 +43,20 @@ #define FUZZ_H #ifndef FUZZ_RNG_SEED_SIZE -# define FUZZ_RNG_SEED_SIZE 128 +# define FUZZ_RNG_SEED_SIZE 4 #endif #include #include +#ifdef __cplusplus +extern "C" { +#endif + int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); +#ifdef __cplusplus +} +#endif + #endif diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py new file mode 100755 index 00000000..0ce201cd --- /dev/null +++ b/tests/fuzz/fuzz.py @@ -0,0 +1,721 @@ +#! /usr/bin/env python + +# ################################################################ +# Copyright (c) 2016-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ########################################################################## + +import argparse +import contextlib +import os +import re +import shutil +import subprocess +import sys +import tempfile + + +def abs_join(a, *p): + return os.path.abspath(os.path.join(a, *p)) + + +# Constants +FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) +CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') +TARGETS = [ + 'simple_round_trip', + 'stream_round_trip', + 'block_round_trip', + 'simple_decompress', + 'stream_decompress', + 'block_decompress', +] +ALL_TARGETS = TARGETS + ['all'] +FUZZ_RNG_SEED_SIZE = 4 + +# Standard environment variables +CC = os.environ.get('CC', 'cc') +CXX = os.environ.get('CXX', 'c++') +CPPFLAGS = os.environ.get('CPPFLAGS', '') +CFLAGS = os.environ.get('CFLAGS', '-O3') +CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) +LDFLAGS = os.environ.get('LDFLAGS', '') +MFLAGS = os.environ.get('MFLAGS', '-j') + +# Fuzzing environment variables +LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') +AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') +DECODECORPUS = os.environ.get('DECODECORPUS', + abs_join(FUZZ_DIR, '..', 'decodecorpus')) + +# Sanitizer environment variables +MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') +MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') +MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') +MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') + + +def create(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + os.mkdir(d) + return d + + +def check(r): + d = os.path.abspath(r) + if not os.path.isdir(d): + return None + return d + + +@contextlib.contextmanager +def tmpdir(): + dirpath = tempfile.mkdtemp() + try: + yield dirpath + finally: + shutil.rmtree(dirpath, ignore_errors=True) + + +def parse_env_flags(args, flags): + """ + Look for flags set by environment variables. + """ + flags = ' '.join(flags) + san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) + nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) + + def set_sanitizer(sanitizer, default, san, nosan): + if sanitizer in san and sanitizer in nosan: + raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. + format(s=sanitizer)) + if sanitizer in san: + return True + if sanitizer in nosan: + return False + return default + + san = set(san_flags.split(',')) + nosan = set(nosan_flags.split(',')) + + args.asan = set_sanitizer('address', args.asan, san, nosan) + args.msan = set_sanitizer('memory', args.msan, san, nosan) + args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) + + args.sanitize = args.asan or args.msan or args.ubsan + + return args + + +def build_parser(args): + description = """ + Cleans the repository and builds a fuzz target (or all). + Many flags default to environment variables (default says $X='y'). + Options that aren't enabling features default to the correct values for + zstd. + Enable sanitizers with --enable-*san. + For regression testing just build. + For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. + For AFL set CC and CXX to AFL's compilers and set + LIB_FUZZING_ENGINE='libregression.a'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--lib-fuzzing-engine', + dest='lib_fuzzing_engine', + type=str, + default=LIB_FUZZING_ENGINE, + help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' + "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) + parser.add_argument( + '--enable-coverage', + dest='coverage', + action='store_true', + help='Enable coverage instrumentation (-fsanitize-coverage)') + parser.add_argument( + '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan', + dest='ubsan', + action='store_true', + help='Enable UBSAN') + parser.add_argument( + '--enable-ubsan-pointer-overflow', + dest='ubsan_pointer_overflow', + action='store_true', + help='Enable UBSAN pointer overflow check (known failure)') + parser.add_argument( + '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') + parser.add_argument( + '--enable-msan-track-origins', dest='msan_track_origins', + action='store_true', help='Enable MSAN origin tracking') + parser.add_argument( + '--msan-extra-cppflags', + dest='msan_extra_cppflags', + type=str, + default=MSAN_EXTRA_CPPFLAGS, + help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". + format(MSAN_EXTRA_CPPFLAGS)) + parser.add_argument( + '--msan-extra-cflags', + dest='msan_extra_cflags', + type=str, + default=MSAN_EXTRA_CFLAGS, + help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( + MSAN_EXTRA_CFLAGS)) + parser.add_argument( + '--msan-extra-cxxflags', + dest='msan_extra_cxxflags', + type=str, + default=MSAN_EXTRA_CXXFLAGS, + help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". + format(MSAN_EXTRA_CXXFLAGS)) + parser.add_argument( + '--msan-extra-ldflags', + dest='msan_extra_ldflags', + type=str, + default=MSAN_EXTRA_LDFLAGS, + help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". + format(MSAN_EXTRA_LDFLAGS)) + parser.add_argument( + '--enable-sanitize-recover', + dest='sanitize_recover', + action='store_true', + help='Non-fatal sanitizer errors where possible') + parser.add_argument( + '--debug', + dest='debug', + type=int, + default=1, + help='Set ZSTD_DEBUG (default: 1)') + parser.add_argument( + '--force-memory-access', + dest='memory_access', + type=int, + default=0, + help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') + parser.add_argument( + '--fuzz-rng-seed-size', + dest='fuzz_rng_seed_size', + type=int, + default=4, + help='Set FUZZ_RNG_SEED_SIZE (default: 4)') + parser.add_argument( + '--disable-fuzzing-mode', + dest='fuzzing_mode', + action='store_false', + help='Do not define FUZZING_BUILD_MORE_UNSAFE_FOR_PRODUCTION') + parser.add_argument( + '--enable-stateful-fuzzing', + dest='stateful_fuzzing', + action='store_true', + help='Reuse contexts between runs (makes reproduction impossible)') + parser.add_argument( + '--cc', + dest='cc', + type=str, + default=CC, + help="CC (default: $CC='{}')".format(CC)) + parser.add_argument( + '--cxx', + dest='cxx', + type=str, + default=CXX, + help="CXX (default: $CXX='{}')".format(CXX)) + parser.add_argument( + '--cppflags', + dest='cppflags', + type=str, + default=CPPFLAGS, + help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) + parser.add_argument( + '--cflags', + dest='cflags', + type=str, + default=CFLAGS, + help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) + parser.add_argument( + '--cxxflags', + dest='cxxflags', + type=str, + default=CXXFLAGS, + help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) + parser.add_argument( + '--ldflags', + dest='ldflags', + type=str, + default=LDFLAGS, + help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) + parser.add_argument( + '--mflags', + dest='mflags', + type=str, + default=MFLAGS, + help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) + ) + args = parser.parse_args(args) + args = parse_env_flags(args, ' '.join( + [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) + + # Check option sanitiy + if args.msan and (args.asan or args.ubsan): + raise RuntimeError('MSAN may not be used with any other sanitizers') + if args.msan_track_origins and not args.msan: + raise RuntimeError('--enable-msan-track-origins requires MSAN') + if args.ubsan_pointer_overflow and not args.ubsan: + raise RuntimeError('--enable-ubsan-pointer-overlow requires UBSAN') + if args.sanitize_recover and not args.sanitize: + raise RuntimeError('--enable-sanitize-recover but no sanitizers used') + + return args + + +def build(args): + try: + args = build_parser(args) + except Exception as e: + print(e) + return 1 + # The compilation flags we are setting + targets = args.TARGET + cc = args.cc + cxx = args.cxx + cppflags = [args.cppflags] + cflags = [args.cflags] + ldflags = [args.ldflags] + cxxflags = [args.cxxflags] + mflags = [args.mflags] if args.mflags else [] + # Flags to be added to both cflags and cxxflags + common_flags = [] + + cppflags += [ + '-DZSTD_DEBUG={}'.format(args.debug), + '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), + '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), + ] + + mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] + + # Set flags for options + if args.coverage: + common_flags += [ + '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' + ] + + if args.sanitize_recover: + recover_flags = ['-fsanitize-recover=all'] + else: + recover_flags = ['-fno-sanitize-recover=all'] + if args.sanitize: + common_flags += recover_flags + + if args.msan: + msan_flags = ['-fsanitize=memory'] + if args.msan_track_origins: + msan_flags += ['-fsanitize-memory-track-origins'] + common_flags += msan_flags + # Append extra MSAN flags (it might require special setup) + cppflags += [args.msan_extra_cppflags] + cflags += [args.msan_extra_cflags] + cxxflags += [args.msan_extra_cxxflags] + ldflags += [args.msan_extra_ldflags] + + if args.asan: + common_flags += ['-fsanitize=address'] + + if args.ubsan: + ubsan_flags = ['-fsanitize=undefined'] + if not args.ubsan_pointer_overflow: + ubsan_flags += ['-fno-sanitize=pointer-overflow'] + common_flags += ubsan_flags + + if args.stateful_fuzzing: + cppflags += ['-DSTATEFUL_FUZZING'] + + if args.fuzzing_mode: + cppflags += ['-DFUZZING_BUILD_MORE_UNSAFE_FOR_PRODUCTION'] + + if args.lib_fuzzing_engine == 'libregression.a': + targets = ['libregression.a'] + targets + + # Append the common flags + cflags += common_flags + cxxflags += common_flags + + # Prepare the flags for Make + cc_str = "CC={}".format(cc) + cxx_str = "CXX={}".format(cxx) + cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) + cflags_str = "CFLAGS={}".format(' '.join(cflags)) + cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) + ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) + + # Print the flags + print('MFLAGS={}'.format(' '.join(mflags))) + print(cc_str) + print(cxx_str) + print(cppflags_str) + print(cflags_str) + print(cxxflags_str) + print(ldflags_str) + + # Clean and build + clean_cmd = ['make', 'clean'] + mflags + print(' '.join(clean_cmd)) + subprocess.check_call(clean_cmd) + build_cmd = [ + 'make', + cc_str, + cxx_str, + cppflags_str, + cflags_str, + cxxflags_str, + ldflags_str, + ] + mflags + targets + print(' '.join(build_cmd)) + subprocess.check_call(build_cmd) + return 0 + + +def libfuzzer_parser(args): + description = """ + Runs a libfuzzer binary. + Passes all extra arguments to libfuzzer. + The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to + libFuzzer.a. + Generates output in the CORPORA directory, puts crashes in the ARTIFACT + directory, and takes extra input from the SEED directory. + To merge AFL's output pass the SEED as AFL's output directory and pass + '-merge=1'. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--artifact', + type=str, + help='Override the default artifact dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-crash'))) + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.corpora: + args.corpora = abs_join(CORPORA_DIR, args.TARGET) + if not args.artifact: + args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET)) + if not args.seed: + args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) + + return args + + +def libfuzzer(args): + try: + args = libfuzzer_parser(args) + except Exception as e: + print(e) + return 1 + target = abs_join(FUZZ_DIR, args.TARGET) + + corpora = [create(args.corpora)] + artifact = create(args.artifact) + seed = check(args.seed) + + corpora += [artifact] + if seed is not None: + corpora += [seed] + + cmd = [target, '-artifact_prefix={}/'.format(artifact)] + cmd += corpora + args.extra + print(' '.join(cmd)) + subprocess.call(cmd) + return 0 + + +def afl_parser(args): + description = """ + Runs an afl-fuzz job. + Passes all extra arguments to afl-fuzz. + The fuzzer should have been built with CC/CXX set to the AFL compilers, + and with LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA and writes output to OUTPUT. + Uses AFL_FUZZ as the binary (set from flag or environment variable). + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--corpora', + type=str, + help='Override the default corpora dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET'))) + parser.add_argument( + '--output', + type=str, + help='Override the default AFL output dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-afl'))) + parser.add_argument( + '--afl-fuzz', + type=str, + default=AFL_FUZZ, + help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.corpora: + args.corpora = abs_join(CORPORA_DIR, args.TARGET) + if not args.output: + args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) + + return args + + +def afl(args): + try: + args = afl_parser(args) + except Exception as e: + print(e) + return 1 + target = abs_join(FUZZ_DIR, args.TARGET) + + corpora = create(args.corpora) + output = create(args.output) + + cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra + cmd += [target, '@@'] + print(' '.join(cmd)) + subprocess.call(cmd) + return 0 + + +def regression_parser(args): + description = """ + Runs one or more regression tests. + The fuzzer should have been built with with + LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) + args = parser.parse_args(args) + + targets = set() + for target in args.TARGET: + if not target: + continue + if target == 'all': + targets = targets.union(TARGETS) + elif target in TARGETS: + targets.add(target) + else: + raise RuntimeError('{} is not a valid target'.format(target)) + args.TARGET = list(targets) + + return args + + +def regression(args): + try: + args = regression_parser(args) + except Exception as e: + print(e) + return 1 + for target in args.TARGET: + corpora = create(abs_join(CORPORA_DIR, target)) + target = abs_join(FUZZ_DIR, target) + cmd = [target, corpora] + print(' '.join(cmd)) + subprocess.check_call(cmd) + return 0 + + +def gen_parser(args): + description = """ + Generate a seed corpus appropiate for TARGET with data generated with + decodecorpus. + The fuzz inputs are prepended with a seed before the zstd data, so the + output of decodecorpus shouldn't be used directly. + Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and + puts the output in SEED. + DECODECORPUS is the decodecorpus binary, and must already be built. + """ + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + '--number', + '-n', + type=int, + default=100, + help='Number of samples to generate') + parser.add_argument( + '--max-size-log', + type=int, + default=13, + help='Maximum sample size to generate') + parser.add_argument( + '--seed', + type=str, + help='Override the default seed dir (default: {})'.format( + abs_join(CORPORA_DIR, 'TARGET-seed'))) + parser.add_argument( + '--decodecorpus', + type=str, + default=DECODECORPUS, + help="decodecorpus binary (default: $DECODECORPUS='{}')".format( + DECODECORPUS)) + parser.add_argument( + '--fuzz-rng-seed-size', + type=int, + default=4, + help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" + ) + parser.add_argument( + 'TARGET', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + if args.TARGET and args.TARGET not in TARGETS: + raise RuntimeError('{} is not a valid target'.format(args.TARGET)) + + if not args.seed: + args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) + + if not os.path.isfile(args.decodecorpus): + raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". + format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) + + return args + + +def gen(args): + try: + args = gen_parser(args) + except Exception as e: + print(e) + return 1 + + seed = create(args.seed) + with tmpdir() as compressed: + with tmpdir() as decompressed: + cmd = [ + args.decodecorpus, + '-n{}'.format(args.number), + '-p{}/'.format(compressed), + '-o{}'.format(decompressed), + ] + + if 'block_' in args.TARGET: + cmd += [ + '--gen-blocks', + '--max-block-size-log={}'.format(args.max_size_log) + ] + else: + cmd += ['--max-content-size-log={}'.format(args.max_size_log)] + + print(' '.join(cmd)) + subprocess.check_call(cmd) + + if '_round_trip' in args.TARGET: + print('using decompressed data in {}'.format(decompressed)) + samples = decompressed + elif '_decompress' in args.TARGET: + print('using compressed data in {}'.format(compressed)) + samples = compressed + + # Copy the samples over and prepend the RNG seeds + for name in os.listdir(samples): + samplename = abs_join(samples, name) + outname = abs_join(seed, name) + rng_seed = os.urandom(args.fuzz_rng_seed_size) + with open(samplename, 'rb') as sample: + with open(outname, 'wb') as out: + out.write(rng_seed) + CHUNK_SIZE = 131072 + chunk = sample.read(CHUNK_SIZE) + while len(chunk) > 0: + out.write(chunk) + chunk = sample.read(CHUNK_SIZE) + return 0 + + +def short_help(args): + name = args[0] + print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) + + +def help(args): + short_help(args) + print("\tfuzzing helpers (select a command and pass -h for help)\n") + print("Options:") + print("\t-h, --help\tPrint this message") + print("") + print("Commands:") + print("\tbuild\t\tBuild a fuzzer") + print("\tlibfuzzer\tRun a libFuzzer fuzzer") + print("\tafl\t\tRun an AFL fuzzer") + print("\tregression\tRun a regression test") + print("\tgen\t\tGenerate a seed corpus for a fuzzer") + + +def main(): + args = sys.argv + if len(args) < 2: + help(args) + return 1 + if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': + help(args) + return 1 + command = args.pop(1) + args[0] = "{} {}".format(args[0], command) + if command == "build": + return build(args) + if command == "libfuzzer": + return libfuzzer(args) + if command == "regression": + return regression(args) + if command == "afl": + return afl(args) + if command == "gen": + return gen(args) + short_help(args) + print("Error: No such command {} (pass -h for help)".format(command)) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h index f7cc3d5b..cb3421bb 100644 --- a/tests/fuzz/fuzz_helpers.h +++ b/tests/fuzz/fuzz_helpers.h @@ -19,6 +19,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -48,11 +52,14 @@ /** * Determininistically constructs a seed based on the fuzz input. - * Only looks at the first FUZZ_RNG_SEED_SIZE bytes of the input. + * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input. */ -FUZZ_STATIC uint32_t FUZZ_seed(const uint8_t *src, size_t size) { - size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, size); - return XXH32(src, toHash, 0); +FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) { + uint8_t const *data = *src; + size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size); + *size -= toHash; + *src += toHash; + return XXH32(data, toHash, 0); } #define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) @@ -67,4 +74,8 @@ FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) { return rand32 >> 5; } +#ifdef __cplusplus +} +#endif + #endif diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 36ae3884..2b714d29 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -29,9 +29,11 @@ int main(int argc, char const **argv) { #ifdef UTIL_HAS_CREATEFILELIST files = UTIL_createFileList(files, numFiles, &fileNamesBuf, &numFiles, kFollowLinks); - FUZZ_ASSERT(files); + if (!files) + numFiles = 0; #endif - + if (numFiles == 0) + fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); for (i = 0; i < numFiles; ++i) { char const *fileName = files[i]; size_t const fileSize = UTIL_getFileSize(fileName); diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index a225b9dc..bba272c6 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -24,7 +24,10 @@ static size_t bufSize = 0; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t const neededBufSize = MAX(20 * size, (size_t)256 << 10); + size_t neededBufSize; + + FUZZ_seed(&src, &size); + neededBufSize = MAX(20 * size, (size_t)256 << 10); /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize) { @@ -39,7 +42,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); -#ifndef STATEFULL_FUZZING +#ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; #endif return 0; diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 63472bb4..9df02528 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -44,9 +44,10 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t const neededBufSize = ZSTD_compressBound(size); + size_t neededBufSize; - seed = FUZZ_seed(src, size); + seed = FUZZ_seed(&src, &size); + neededBufSize = ZSTD_compressBound(size); /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize) { @@ -73,7 +74,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } -#ifndef STATEFULL_FUZZING +#ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; #endif diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index dfd74697..7ad57122 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -51,7 +51,7 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - seed = FUZZ_seed(src, size); + seed = FUZZ_seed(&src, &size); /* Allocate all buffers and contexts if not already allocated */ if (!buf) { @@ -78,7 +78,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } error: -#ifndef STATEFULL_FUZZING +#ifndef STATEFUL_FUZZING ZSTD_freeDStream(dstream); dstream = NULL; #endif return 0; diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index 74adbeab..e796c1e7 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -114,9 +114,10 @@ static size_t compress(uint8_t *dst, size_t capacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t const neededBufSize = ZSTD_compressBound(size) * 2; + size_t neededBufSize; - seed = FUZZ_seed(src, size); + seed = FUZZ_seed(&src, &size); + neededBufSize = ZSTD_compressBound(size) * 2; /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize) { @@ -145,7 +146,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } -#ifndef STATEFULL_FUZZING +#ifndef STATEFUL_FUZZING ZSTD_freeCStream(cstream); cstream = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; #endif