Merge pull request #325 from terrelln/dev
Add Parallel Zstandard tool to contrib/
This commit is contained in:
commit
be38198ef0
1
.gitignore
vendored
1
.gitignore
vendored
@ -31,3 +31,4 @@ _zstdbench/
|
|||||||
*.idea
|
*.idea
|
||||||
*.swp
|
*.swp
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
googletest/
|
||||||
|
@ -9,7 +9,7 @@ matrix:
|
|||||||
env: PLATFORM="Ubuntu 12.04 container" CMD="make test && make clean && make travis-install"
|
env: PLATFORM="Ubuntu 12.04 container" CMD="make test && make clean && make travis-install"
|
||||||
- os: linux
|
- os: linux
|
||||||
sudo: false
|
sudo: false
|
||||||
env: PLATFORM="Ubuntu 12.04 container" CMD="make -C tests test-zstd_nolegacy && make clean && make zlibwrapper && make clean && make cmaketest"
|
env: PLATFORM="Ubuntu 12.04 container" CMD="make -C tests test-zstd_nolegacy && make clean && make zlibwrapper && make clean && make cmaketest && make -C contrib/pzstd googletest && make -C contrib/pzstd test && make -C contrib/pzstd clean"
|
||||||
- os: linux
|
- os: linux
|
||||||
sudo: false
|
sudo: false
|
||||||
env: PLATFORM="Ubuntu 12.04 container" CMD="make usan"
|
env: PLATFORM="Ubuntu 12.04 container" CMD="make usan"
|
||||||
|
55
contrib/pzstd/ErrorHolder.h
Normal file
55
contrib/pzstd/ErrorHolder.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
// Coordinates graceful shutdown of the pzstd pipeline
|
||||||
|
class ErrorHolder {
|
||||||
|
std::atomic<bool> error_;
|
||||||
|
std::string message_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ErrorHolder() : error_(false) {}
|
||||||
|
|
||||||
|
bool hasError() noexcept {
|
||||||
|
return error_.load();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setError(std::string message) noexcept {
|
||||||
|
// Given multiple possibly concurrent calls, exactly one will ever succeed.
|
||||||
|
bool expected = false;
|
||||||
|
if (error_.compare_exchange_strong(expected, true)) {
|
||||||
|
message_ = std::move(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool check(bool predicate, std::string message) noexcept {
|
||||||
|
if (!predicate) {
|
||||||
|
setError(std::move(message));
|
||||||
|
}
|
||||||
|
return !hasError();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string getError() noexcept {
|
||||||
|
error_.store(false);
|
||||||
|
return std::move(message_);
|
||||||
|
}
|
||||||
|
|
||||||
|
~ErrorHolder() {
|
||||||
|
if (hasError()) {
|
||||||
|
throw std::logic_error(message_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
76
contrib/pzstd/Makefile
Normal file
76
contrib/pzstd/Makefile
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# ##########################################################################
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the BSD-style license found in the
|
||||||
|
# LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
# of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
# ##########################################################################
|
||||||
|
|
||||||
|
ZSTDDIR = ../../lib
|
||||||
|
PROGDIR = ../../programs
|
||||||
|
|
||||||
|
CPPFLAGS = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -I$(PROGDIR) -I.
|
||||||
|
CFLAGS ?= -O3
|
||||||
|
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wstrict-aliasing=1 \
|
||||||
|
-Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \
|
||||||
|
-std=c++11
|
||||||
|
CFLAGS += $(MOREFLAGS)
|
||||||
|
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
|
||||||
|
ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
|
||||||
|
ZSTDCOMP_FILES := $(ZSTDDIR)/compress/zstd_compress.c $(ZSTDDIR)/compress/fse_compress.c $(ZSTDDIR)/compress/huf_compress.c
|
||||||
|
ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/huf_decompress.c
|
||||||
|
ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
|
||||||
|
|
||||||
|
|
||||||
|
# Define *.exe as extension for Windows systems
|
||||||
|
ifneq (,$(filter Windows%,$(OS)))
|
||||||
|
EXT =.exe
|
||||||
|
else
|
||||||
|
EXT =
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: default all test clean
|
||||||
|
|
||||||
|
default: pzstd
|
||||||
|
|
||||||
|
all: pzstd
|
||||||
|
|
||||||
|
|
||||||
|
libzstd.a: $(ZSTD_FILES)
|
||||||
|
$(MAKE) -C $(ZSTDDIR) libzstd
|
||||||
|
@cp $(ZSTDDIR)/libzstd.a .
|
||||||
|
|
||||||
|
|
||||||
|
Pzstd.o: Pzstd.h Pzstd.cpp ErrorHolder.h utils/*.h
|
||||||
|
$(CXX) $(FLAGS) -c Pzstd.cpp -o $@
|
||||||
|
|
||||||
|
SkippableFrame.o: SkippableFrame.h SkippableFrame.cpp utils/*.h
|
||||||
|
$(CXX) $(FLAGS) -c SkippableFrame.cpp -o $@
|
||||||
|
|
||||||
|
Options.o: Options.h Options.cpp
|
||||||
|
$(CXX) $(FLAGS) -c Options.cpp -o $@
|
||||||
|
|
||||||
|
main.o: main.cpp *.h utils/*.h
|
||||||
|
$(CXX) $(FLAGS) -c main.cpp -o $@
|
||||||
|
|
||||||
|
pzstd: libzstd.a Pzstd.o SkippableFrame.o Options.o main.o
|
||||||
|
$(CXX) $(FLAGS) $^ -o $@$(EXT)
|
||||||
|
|
||||||
|
googletest:
|
||||||
|
@git clone https://github.com/google/googletest
|
||||||
|
@mkdir -p googletest/build
|
||||||
|
@cd googletest/build && cmake .. && make
|
||||||
|
|
||||||
|
test: libzstd.a Pzstd.o Options.o SkippableFrame.o
|
||||||
|
$(MAKE) -C utils/test test
|
||||||
|
$(MAKE) -C test test
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(MAKE) -C $(ZSTDDIR) clean
|
||||||
|
$(MAKE) -C utils/test clean
|
||||||
|
$(MAKE) -C test clean
|
||||||
|
@$(RM) -rf googletest/ libzstd.a *.o pzstd$(EXT)
|
||||||
|
@echo Cleaning completed
|
182
contrib/pzstd/Options.cpp
Normal file
182
contrib/pzstd/Options.cpp
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "Options.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
unsigned parseUnsigned(const char* arg) {
|
||||||
|
unsigned result = 0;
|
||||||
|
while (*arg >= '0' && *arg <= '9') {
|
||||||
|
result *= 10;
|
||||||
|
result += *arg - '0';
|
||||||
|
++arg;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string zstdExtension = ".zst";
|
||||||
|
constexpr unsigned defaultCompressionLevel = 3;
|
||||||
|
constexpr unsigned maxNonUltraCompressionLevel = 19;
|
||||||
|
|
||||||
|
void usage() {
|
||||||
|
std::fprintf(stderr, "Usage:\n");
|
||||||
|
std::fprintf(stderr, "\tpzstd [args] FILE\n");
|
||||||
|
std::fprintf(stderr, "Parallel ZSTD options:\n");
|
||||||
|
std::fprintf(stderr, "\t-n/--num-threads #: Number of threads to spawn\n");
|
||||||
|
std::fprintf(stderr, "\t-p/--pzstd-headers: Write pzstd headers to enable parallel decompression\n");
|
||||||
|
|
||||||
|
std::fprintf(stderr, "ZSTD options:\n");
|
||||||
|
std::fprintf(stderr, "\t-u/--ultra : enable levels beyond %i, up to %i (requires more memory)\n", maxNonUltraCompressionLevel, ZSTD_maxCLevel());
|
||||||
|
std::fprintf(stderr, "\t-h/--help : display help and exit\n");
|
||||||
|
std::fprintf(stderr, "\t-V/--version : display version number and exit\n");
|
||||||
|
std::fprintf(stderr, "\t-d/--decompress : decompression\n");
|
||||||
|
std::fprintf(stderr, "\t-f/--force : overwrite output\n");
|
||||||
|
std::fprintf(stderr, "\t-o/--output file : result stored into `file`\n");
|
||||||
|
std::fprintf(stderr, "\t-c/--stdout : write output to standard output\n");
|
||||||
|
std::fprintf(stderr, "\t-# : # compression level (1-%d, default:%d)\n", maxNonUltraCompressionLevel, defaultCompressionLevel);
|
||||||
|
}
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
Options::Options()
|
||||||
|
: numThreads(0),
|
||||||
|
maxWindowLog(23),
|
||||||
|
compressionLevel(defaultCompressionLevel),
|
||||||
|
decompress(false),
|
||||||
|
overwrite(false),
|
||||||
|
pzstdHeaders(false) {}
|
||||||
|
|
||||||
|
bool Options::parse(int argc, const char** argv) {
|
||||||
|
bool ultra = false;
|
||||||
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
const char* arg = argv[i];
|
||||||
|
// Arguments with a short option
|
||||||
|
char option = 0;
|
||||||
|
if (!std::strcmp(arg, "--num-threads")) {
|
||||||
|
option = 'n';
|
||||||
|
} else if (!std::strcmp(arg, "--pzstd-headers")) {
|
||||||
|
option = 'p';
|
||||||
|
} else if (!std::strcmp(arg, "--ultra")) {
|
||||||
|
option = 'u';
|
||||||
|
} else if (!std::strcmp(arg, "--version")) {
|
||||||
|
option = 'V';
|
||||||
|
} else if (!std::strcmp(arg, "--help")) {
|
||||||
|
option = 'h';
|
||||||
|
} else if (!std::strcmp(arg, "--decompress")) {
|
||||||
|
option = 'd';
|
||||||
|
} else if (!std::strcmp(arg, "--force")) {
|
||||||
|
option = 'f';
|
||||||
|
} else if (!std::strcmp(arg, "--output")) {
|
||||||
|
option = 'o';
|
||||||
|
} else if (!std::strcmp(arg, "--stdout")) {
|
||||||
|
option = 'c';
|
||||||
|
}else if (arg[0] == '-' && arg[1] != 0) {
|
||||||
|
// Parse the compression level or short option
|
||||||
|
if (arg[1] >= '0' && arg[1] <= '9') {
|
||||||
|
compressionLevel = parseUnsigned(arg + 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
option = arg[1];
|
||||||
|
} else if (inputFile.empty()) {
|
||||||
|
inputFile = arg;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
std::fprintf(stderr, "Invalid argument: %s.\n", arg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (option) {
|
||||||
|
case 'n':
|
||||||
|
if (++i == argc) {
|
||||||
|
std::fprintf(stderr, "Invalid argument: -n requires an argument.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
numThreads = parseUnsigned(argv[i]);
|
||||||
|
if (numThreads == 0) {
|
||||||
|
std::fprintf(stderr, "Invalid argument: # of threads must be > 0.\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
pzstdHeaders = true;
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
ultra = true;
|
||||||
|
maxWindowLog = 0;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
std::fprintf(stderr, "ZSTD version: %s.\n", ZSTD_VERSION_STRING);
|
||||||
|
return false;
|
||||||
|
case 'h':
|
||||||
|
usage();
|
||||||
|
return false;
|
||||||
|
case 'd':
|
||||||
|
decompress = true;
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
overwrite = true;
|
||||||
|
break;
|
||||||
|
case 'o':
|
||||||
|
if (++i == argc) {
|
||||||
|
std::fprintf(stderr, "Invalid argument: -o requires an argument.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
outputFile = argv[i];
|
||||||
|
break;
|
||||||
|
case 'c':
|
||||||
|
outputFile = '-';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::fprintf(stderr, "Invalid argument: %s.\n", arg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Determine input file if not specified
|
||||||
|
if (inputFile.empty()) {
|
||||||
|
inputFile = "-";
|
||||||
|
}
|
||||||
|
// Determine output file if not specified
|
||||||
|
if (outputFile.empty()) {
|
||||||
|
if (inputFile == "-") {
|
||||||
|
std::fprintf(
|
||||||
|
stderr,
|
||||||
|
"Invalid arguments: Reading from stdin, but -o not provided.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Attempt to add/remove zstd extension from the input file
|
||||||
|
if (decompress) {
|
||||||
|
int stemSize = inputFile.size() - zstdExtension.size();
|
||||||
|
if (stemSize > 0 && inputFile.substr(stemSize) == zstdExtension) {
|
||||||
|
outputFile = inputFile.substr(0, stemSize);
|
||||||
|
} else {
|
||||||
|
std::fprintf(
|
||||||
|
stderr, "Invalid argument: Unable to determine output file.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
outputFile = inputFile + zstdExtension;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check compression level
|
||||||
|
{
|
||||||
|
unsigned maxCLevel = ultra ? ZSTD_maxCLevel() : maxNonUltraCompressionLevel;
|
||||||
|
if (compressionLevel > maxCLevel) {
|
||||||
|
std::fprintf(
|
||||||
|
stderr, "Invalid compression level %u.\n", compressionLevel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check that numThreads is set
|
||||||
|
if (numThreads == 0) {
|
||||||
|
std::fprintf(stderr, "Invalid arguments: # of threads not specified.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
60
contrib/pzstd/Options.h
Normal file
60
contrib/pzstd/Options.h
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define ZSTD_STATIC_LINKING_ONLY
|
||||||
|
#include "zstd.h"
|
||||||
|
#undef ZSTD_STATIC_LINKING_ONLY
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
struct Options {
|
||||||
|
unsigned numThreads;
|
||||||
|
unsigned maxWindowLog;
|
||||||
|
unsigned compressionLevel;
|
||||||
|
bool decompress;
|
||||||
|
std::string inputFile;
|
||||||
|
std::string outputFile;
|
||||||
|
bool overwrite;
|
||||||
|
bool pzstdHeaders;
|
||||||
|
|
||||||
|
Options();
|
||||||
|
Options(
|
||||||
|
unsigned numThreads,
|
||||||
|
unsigned maxWindowLog,
|
||||||
|
unsigned compressionLevel,
|
||||||
|
bool decompress,
|
||||||
|
const std::string& inputFile,
|
||||||
|
const std::string& outputFile,
|
||||||
|
bool overwrite,
|
||||||
|
bool pzstdHeaders)
|
||||||
|
: numThreads(numThreads),
|
||||||
|
maxWindowLog(maxWindowLog),
|
||||||
|
compressionLevel(compressionLevel),
|
||||||
|
decompress(decompress),
|
||||||
|
inputFile(inputFile),
|
||||||
|
outputFile(outputFile),
|
||||||
|
overwrite(overwrite),
|
||||||
|
pzstdHeaders(pzstdHeaders) {}
|
||||||
|
|
||||||
|
bool parse(int argc, const char** argv);
|
||||||
|
|
||||||
|
ZSTD_parameters determineParameters() const {
|
||||||
|
ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, 0);
|
||||||
|
if (maxWindowLog != 0 && params.cParams.windowLog > maxWindowLog) {
|
||||||
|
params.cParams.windowLog = maxWindowLog;
|
||||||
|
params.cParams = ZSTD_adjustCParams(params.cParams, 0, 0);
|
||||||
|
}
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
462
contrib/pzstd/Pzstd.cpp
Normal file
462
contrib/pzstd/Pzstd.cpp
Normal file
@ -0,0 +1,462 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "Pzstd.h"
|
||||||
|
#include "SkippableFrame.h"
|
||||||
|
#include "utils/FileSystem.h"
|
||||||
|
#include "utils/Range.h"
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
#include "utils/ThreadPool.h"
|
||||||
|
#include "utils/WorkQueue.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
#ifdef _WIN32
|
||||||
|
const std::string nullOutput = "nul";
|
||||||
|
#else
|
||||||
|
const std::string nullOutput = "/dev/null";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
using std::size_t;
|
||||||
|
|
||||||
|
size_t pzstdMain(const Options& options, ErrorHolder& errorHolder) {
|
||||||
|
// Open the input file and attempt to determine its size
|
||||||
|
FILE* inputFd = stdin;
|
||||||
|
size_t inputSize = 0;
|
||||||
|
if (options.inputFile != "-") {
|
||||||
|
inputFd = std::fopen(options.inputFile.c_str(), "rb");
|
||||||
|
if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::error_code ec;
|
||||||
|
inputSize = file_size(options.inputFile, ec);
|
||||||
|
if (ec) {
|
||||||
|
inputSize = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); });
|
||||||
|
|
||||||
|
// Check if the output file exists and then open it
|
||||||
|
FILE* outputFd = stdout;
|
||||||
|
if (options.outputFile != "-") {
|
||||||
|
if (!options.overwrite && options.outputFile != nullOutput) {
|
||||||
|
outputFd = std::fopen(options.outputFile.c_str(), "rb");
|
||||||
|
if (!errorHolder.check(outputFd == nullptr, "Output file exists")) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputFd = std::fopen(options.outputFile.c_str(), "wb");
|
||||||
|
if (!errorHolder.check(
|
||||||
|
outputFd != nullptr, "Failed to open output file")) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); });
|
||||||
|
|
||||||
|
// WorkQueue outlives ThreadPool so in the case of error we are certain
|
||||||
|
// we don't accidently try to call push() on it after it is destroyed.
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs;
|
||||||
|
size_t bytesWritten;
|
||||||
|
{
|
||||||
|
// Initialize the thread pool with numThreads
|
||||||
|
ThreadPool executor(options.numThreads);
|
||||||
|
if (!options.decompress) {
|
||||||
|
// Add a job that reads the input and starts all the compression jobs
|
||||||
|
executor.add(
|
||||||
|
[&errorHolder, &outs, &executor, inputFd, inputSize, &options] {
|
||||||
|
asyncCompressChunks(
|
||||||
|
errorHolder,
|
||||||
|
outs,
|
||||||
|
executor,
|
||||||
|
inputFd,
|
||||||
|
inputSize,
|
||||||
|
options.numThreads,
|
||||||
|
options.determineParameters());
|
||||||
|
});
|
||||||
|
// Start writing
|
||||||
|
bytesWritten =
|
||||||
|
writeFile(errorHolder, outs, outputFd, options.pzstdHeaders);
|
||||||
|
} else {
|
||||||
|
// Add a job that reads the input and starts all the decompression jobs
|
||||||
|
executor.add([&errorHolder, &outs, &executor, inputFd] {
|
||||||
|
asyncDecompressFrames(errorHolder, outs, executor, inputFd);
|
||||||
|
});
|
||||||
|
// Start writing
|
||||||
|
bytesWritten = writeFile(
|
||||||
|
errorHolder, outs, outputFd, /* writeSkippableFrames */ false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bytesWritten;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`.
|
||||||
|
static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) {
|
||||||
|
return ZSTD_inBuffer{buffer.data(), buffer.size(), 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advance `buffer` and `inBuffer` by the amount of data read, as indicated by
|
||||||
|
* `inBuffer.pos`.
|
||||||
|
*/
|
||||||
|
void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) {
|
||||||
|
auto pos = inBuffer.pos;
|
||||||
|
inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos;
|
||||||
|
inBuffer.size -= pos;
|
||||||
|
inBuffer.pos = 0;
|
||||||
|
return buffer.advance(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`.
|
||||||
|
static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) {
|
||||||
|
return ZSTD_outBuffer{buffer.data(), buffer.size(), 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split `buffer` and advance `outBuffer` by the amount of data written, as
|
||||||
|
* indicated by `outBuffer.pos`.
|
||||||
|
*/
|
||||||
|
Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) {
|
||||||
|
auto pos = outBuffer.pos;
|
||||||
|
outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos;
|
||||||
|
outBuffer.size -= pos;
|
||||||
|
outBuffer.pos = 0;
|
||||||
|
return buffer.splitAt(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stream chunks of input from `in`, compress it, and stream it out to `out`.
|
||||||
|
*
|
||||||
|
* @param errorHolder Used to report errors and check if an error occured
|
||||||
|
* @param in Queue that we `pop()` input buffers from
|
||||||
|
* @param out Queue that we `push()` compressed output buffers to
|
||||||
|
* @param maxInputSize An upper bound on the size of the input
|
||||||
|
* @param parameters The zstd parameters to use for compression
|
||||||
|
*/
|
||||||
|
static void compress(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
std::shared_ptr<BufferWorkQueue> in,
|
||||||
|
std::shared_ptr<BufferWorkQueue> out,
|
||||||
|
size_t maxInputSize,
|
||||||
|
ZSTD_parameters parameters) {
|
||||||
|
auto guard = makeScopeGuard([&] { out->finish(); });
|
||||||
|
// Initialize the CCtx
|
||||||
|
std::unique_ptr<ZSTD_CStream, size_t (&)(ZSTD_CStream*)> ctx(
|
||||||
|
ZSTD_createCStream(), ZSTD_freeCStream);
|
||||||
|
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto err = ZSTD_initCStream_advanced(ctx.get(), nullptr, 0, parameters, 0);
|
||||||
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate space for the result
|
||||||
|
auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize));
|
||||||
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
||||||
|
{
|
||||||
|
Buffer inBuffer;
|
||||||
|
// Read a buffer in from the input queue
|
||||||
|
while (in->pop(inBuffer) && !errorHolder.hasError()) {
|
||||||
|
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
|
||||||
|
// Compress the whole buffer and send it to the output queue
|
||||||
|
while (!inBuffer.empty() && !errorHolder.hasError()) {
|
||||||
|
if (!errorHolder.check(
|
||||||
|
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Compress
|
||||||
|
auto err =
|
||||||
|
ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
||||||
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Split the compressed data off outBuffer and pass to the output queue
|
||||||
|
out->push(split(outBuffer, zstdOutBuffer));
|
||||||
|
// Forget about the data we already compressed
|
||||||
|
advance(inBuffer, zstdInBuffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write the epilog
|
||||||
|
size_t bytesLeft;
|
||||||
|
do {
|
||||||
|
if (!errorHolder.check(
|
||||||
|
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer);
|
||||||
|
if (!errorHolder.check(
|
||||||
|
!ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
out->push(split(outBuffer, zstdOutBuffer));
|
||||||
|
} while (bytesLeft != 0 && !errorHolder.hasError());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates how large each independently compressed frame should be.
|
||||||
|
*
|
||||||
|
* @param size The size of the source if known, 0 otherwise
|
||||||
|
* @param numThreads The number of threads available to run compression jobs on
|
||||||
|
* @param params The zstd parameters to be used for compression
|
||||||
|
*/
|
||||||
|
static size_t
|
||||||
|
calculateStep(size_t size, size_t numThreads, const ZSTD_parameters& params) {
|
||||||
|
size_t step = 1ul << (params.cParams.windowLog + 2);
|
||||||
|
// If file size is known, see if a smaller step will spread work more evenly
|
||||||
|
if (size != 0) {
|
||||||
|
size_t newStep = size / numThreads;
|
||||||
|
if (newStep != 0) {
|
||||||
|
step = std::min(step, newStep);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return step;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
enum class FileStatus { Continue, Done, Error };
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads `size` data in chunks of `chunkSize` and puts it into `queue`.
|
||||||
|
* Will read less if an error or EOF occurs.
|
||||||
|
* Returns the status of the file after all of the reads have occurred.
|
||||||
|
*/
|
||||||
|
static FileStatus
|
||||||
|
readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd) {
|
||||||
|
Buffer buffer(size);
|
||||||
|
while (!buffer.empty()) {
|
||||||
|
auto bytesRead =
|
||||||
|
std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd);
|
||||||
|
queue.push(buffer.splitAt(bytesRead));
|
||||||
|
if (std::feof(fd)) {
|
||||||
|
return FileStatus::Done;
|
||||||
|
} else if (std::ferror(fd) || bytesRead == 0) {
|
||||||
|
return FileStatus::Error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return FileStatus::Continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
void asyncCompressChunks(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
|
||||||
|
ThreadPool& executor,
|
||||||
|
FILE* fd,
|
||||||
|
size_t size,
|
||||||
|
size_t numThreads,
|
||||||
|
ZSTD_parameters params) {
|
||||||
|
auto chunksGuard = makeScopeGuard([&] { chunks.finish(); });
|
||||||
|
|
||||||
|
// Break the input up into chunks of size `step` and compress each chunk
|
||||||
|
// independently.
|
||||||
|
size_t step = calculateStep(size, numThreads, params);
|
||||||
|
auto status = FileStatus::Continue;
|
||||||
|
while (status == FileStatus::Continue && !errorHolder.hasError()) {
|
||||||
|
// Make a new input queue that we will put the chunk's input data into.
|
||||||
|
auto in = std::make_shared<BufferWorkQueue>();
|
||||||
|
auto inGuard = makeScopeGuard([&] { in->finish(); });
|
||||||
|
// Make a new output queue that compress will put the compressed data into.
|
||||||
|
auto out = std::make_shared<BufferWorkQueue>();
|
||||||
|
// Start compression in the thread pool
|
||||||
|
executor.add([&errorHolder, in, out, step, params] {
|
||||||
|
return compress(
|
||||||
|
errorHolder, std::move(in), std::move(out), step, params);
|
||||||
|
});
|
||||||
|
// Pass the output queue to the writer thread.
|
||||||
|
chunks.push(std::move(out));
|
||||||
|
// Fill the input queue for the compression job we just started
|
||||||
|
status = readData(*in, ZSTD_CStreamInSize(), step, fd);
|
||||||
|
}
|
||||||
|
errorHolder.check(status != FileStatus::Error, "Error reading input");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompress a frame, whose data is streamed into `in`, and stream the output
|
||||||
|
* to `out`.
|
||||||
|
*
|
||||||
|
* @param errorHolder Used to report errors and check if an error occured
|
||||||
|
* @param in Queue that we `pop()` input buffers from. It contains
|
||||||
|
* exactly one compressed frame.
|
||||||
|
* @param out Queue that we `push()` decompressed output buffers to
|
||||||
|
*/
|
||||||
|
static void decompress(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
std::shared_ptr<BufferWorkQueue> in,
|
||||||
|
std::shared_ptr<BufferWorkQueue> out) {
|
||||||
|
auto guard = makeScopeGuard([&] { out->finish(); });
|
||||||
|
// Initialize the DCtx
|
||||||
|
std::unique_ptr<ZSTD_DStream, size_t (&)(ZSTD_DStream*)> ctx(
|
||||||
|
ZSTD_createDStream(), ZSTD_freeDStream);
|
||||||
|
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto err = ZSTD_initDStream(ctx.get());
|
||||||
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t outSize = ZSTD_DStreamOutSize();
|
||||||
|
Buffer inBuffer;
|
||||||
|
size_t returnCode = 0;
|
||||||
|
// Read a buffer in from the input queue
|
||||||
|
while (in->pop(inBuffer) && !errorHolder.hasError()) {
|
||||||
|
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
|
||||||
|
// Decompress the whole buffer and send it to the output queue
|
||||||
|
while (!inBuffer.empty() && !errorHolder.hasError()) {
|
||||||
|
// Allocate a buffer with at least outSize bytes.
|
||||||
|
Buffer outBuffer(outSize);
|
||||||
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
||||||
|
// Decompress
|
||||||
|
returnCode =
|
||||||
|
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
||||||
|
if (!errorHolder.check(
|
||||||
|
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Pass the buffer with the decompressed data to the output queue
|
||||||
|
out->push(split(outBuffer, zstdOutBuffer));
|
||||||
|
// Advance past the input we already read
|
||||||
|
advance(inBuffer, zstdInBuffer);
|
||||||
|
if (returnCode == 0) {
|
||||||
|
// The frame is over, prepare to (maybe) start a new frame
|
||||||
|
ZSTD_initDStream(ctx.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!errorHolder.check(returnCode <= 1, "Incomplete block")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// We've given ZSTD_decompressStream all of our data, but there may still
|
||||||
|
// be data to read.
|
||||||
|
while (returnCode == 1) {
|
||||||
|
// Allocate a buffer with at least outSize bytes.
|
||||||
|
Buffer outBuffer(outSize);
|
||||||
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
||||||
|
// Pass in no input.
|
||||||
|
ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0};
|
||||||
|
// Decompress
|
||||||
|
returnCode =
|
||||||
|
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
||||||
|
if (!errorHolder.check(
|
||||||
|
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Pass the buffer with the decompressed data to the output queue
|
||||||
|
out->push(split(outBuffer, zstdOutBuffer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void asyncDecompressFrames(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
|
||||||
|
ThreadPool& executor,
|
||||||
|
FILE* fd) {
|
||||||
|
auto framesGuard = makeScopeGuard([&] { frames.finish(); });
|
||||||
|
// Split the source up into its component frames.
|
||||||
|
// If we find our recognized skippable frame we know the next frames size
|
||||||
|
// which means that we can decompress each standard frame in independently.
|
||||||
|
// Otherwise, we will decompress using only one decompression task.
|
||||||
|
const size_t chunkSize = ZSTD_DStreamInSize();
|
||||||
|
auto status = FileStatus::Continue;
|
||||||
|
while (status == FileStatus::Continue && !errorHolder.hasError()) {
|
||||||
|
// Make a new input queue that we will put the frames's bytes into.
|
||||||
|
auto in = std::make_shared<BufferWorkQueue>();
|
||||||
|
auto inGuard = makeScopeGuard([&] { in->finish(); });
|
||||||
|
// Make a output queue that decompress will put the decompressed data into
|
||||||
|
auto out = std::make_shared<BufferWorkQueue>();
|
||||||
|
|
||||||
|
size_t frameSize;
|
||||||
|
{
|
||||||
|
// Calculate the size of the next frame.
|
||||||
|
// frameSize is 0 if the frame info can't be decoded.
|
||||||
|
Buffer buffer(SkippableFrame::kSize);
|
||||||
|
auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd);
|
||||||
|
if (bytesRead == 0 && status != FileStatus::Continue) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buffer.subtract(buffer.size() - bytesRead);
|
||||||
|
frameSize = SkippableFrame::tryRead(buffer.range());
|
||||||
|
in->push(std::move(buffer));
|
||||||
|
}
|
||||||
|
// Start decompression in the thread pool
|
||||||
|
executor.add([&errorHolder, in, out] {
|
||||||
|
return decompress(errorHolder, std::move(in), std::move(out));
|
||||||
|
});
|
||||||
|
// Pass the output queue to the writer thread
|
||||||
|
frames.push(std::move(out));
|
||||||
|
if (frameSize == 0) {
|
||||||
|
// We hit a non SkippableFrame ==> not compressed by pzstd or corrupted
|
||||||
|
// Pass the rest of the source to this decompression task
|
||||||
|
while (status == FileStatus::Continue && !errorHolder.hasError()) {
|
||||||
|
status = readData(*in, chunkSize, chunkSize, fd);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Fill the input queue for the decompression job we just started
|
||||||
|
status = readData(*in, chunkSize, frameSize, fd);
|
||||||
|
}
|
||||||
|
errorHolder.check(status != FileStatus::Error, "Error reading input");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write `data` to `fd`, returns true iff success.
|
||||||
|
static bool writeData(ByteRange data, FILE* fd) {
|
||||||
|
while (!data.empty()) {
|
||||||
|
data.advance(std::fwrite(data.begin(), 1, data.size(), fd));
|
||||||
|
if (std::ferror(fd)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t writeFile(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
|
||||||
|
FILE* outputFd,
|
||||||
|
bool writeSkippableFrames) {
|
||||||
|
size_t bytesWritten = 0;
|
||||||
|
std::shared_ptr<BufferWorkQueue> out;
|
||||||
|
// Grab the output queue for each decompression job (in order).
|
||||||
|
while (outs.pop(out) && !errorHolder.hasError()) {
|
||||||
|
if (writeSkippableFrames) {
|
||||||
|
// If we are compressing and want to write skippable frames we can't
|
||||||
|
// start writing before compression is done because we need to know the
|
||||||
|
// compressed size.
|
||||||
|
// Wait for the compressed size to be available and write skippable frame
|
||||||
|
SkippableFrame frame(out->size());
|
||||||
|
if (!writeData(frame.data(), outputFd)) {
|
||||||
|
errorHolder.setError("Failed to write output");
|
||||||
|
return bytesWritten;
|
||||||
|
}
|
||||||
|
bytesWritten += frame.kSize;
|
||||||
|
}
|
||||||
|
// For each chunk of the frame: Pop it from the queue and write it
|
||||||
|
Buffer buffer;
|
||||||
|
while (out->pop(buffer) && !errorHolder.hasError()) {
|
||||||
|
if (!writeData(buffer.range(), outputFd)) {
|
||||||
|
errorHolder.setError("Failed to write output");
|
||||||
|
return bytesWritten;
|
||||||
|
}
|
||||||
|
bytesWritten += buffer.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bytesWritten;
|
||||||
|
}
|
||||||
|
}
|
93
contrib/pzstd/Pzstd.h
Normal file
93
contrib/pzstd/Pzstd.h
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ErrorHolder.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "utils/Buffer.h"
|
||||||
|
#include "utils/Range.h"
|
||||||
|
#include "utils/ThreadPool.h"
|
||||||
|
#include "utils/WorkQueue.h"
|
||||||
|
#define ZSTD_STATIC_LINKING_ONLY
|
||||||
|
#include "zstd.h"
|
||||||
|
#undef ZSTD_STATIC_LINKING_ONLY
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
/**
|
||||||
|
* Runs pzstd with `options` and returns the number of bytes written.
|
||||||
|
* An error occurred if `errorHandler.hasError()`.
|
||||||
|
*
|
||||||
|
* @param options The pzstd options to use for (de)compression
|
||||||
|
* @param errorHolder Used to report errors and coordinate early shutdown
|
||||||
|
* if an error occured
|
||||||
|
* @returns The number of bytes written.
|
||||||
|
*/
|
||||||
|
std::size_t pzstdMain(const Options& options, ErrorHolder& errorHolder);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streams input from `fd`, breaks input up into chunks, and compresses each
|
||||||
|
* chunk independently. Output of each chunk gets streamed to a queue, and
|
||||||
|
* the output queues get put into `chunks` in order.
|
||||||
|
*
|
||||||
|
* @param errorHolder Used to report errors and coordinate early shutdown
|
||||||
|
* @param chunks Each compression jobs output queue gets `pushed()` here
|
||||||
|
* as soon as it is available
|
||||||
|
* @param executor The thread pool to run compression jobs in
|
||||||
|
* @param fd The input file descriptor
|
||||||
|
* @param size The size of the input file if known, 0 otherwise
|
||||||
|
* @param numThreads The number of threads in the thread pool
|
||||||
|
* @param parameters The zstd parameters to use for compression
|
||||||
|
*/
|
||||||
|
void asyncCompressChunks(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
|
||||||
|
ThreadPool& executor,
|
||||||
|
FILE* fd,
|
||||||
|
std::size_t size,
|
||||||
|
std::size_t numThreads,
|
||||||
|
ZSTD_parameters parameters);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streams input from `fd`. If pzstd headers are available it breaks the input
|
||||||
|
* up into independent frames. It sends each frame to an independent
|
||||||
|
* decompression job. Output of each frame gets streamed to a queue, and
|
||||||
|
* the output queues get put into `frames` in order.
|
||||||
|
*
|
||||||
|
* @param errorHolder Used to report errors and coordinate early shutdown
|
||||||
|
* @param frames Each decompression jobs output queue gets `pushed()` here
|
||||||
|
* as soon as it is available
|
||||||
|
* @param executor The thread pool to run compression jobs in
|
||||||
|
* @param fd The input file descriptor
|
||||||
|
*/
|
||||||
|
void asyncDecompressFrames(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
|
||||||
|
ThreadPool& executor,
|
||||||
|
FILE* fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streams input in from each queue in `outs` in order, and writes the data to
|
||||||
|
* `outputFd`.
|
||||||
|
*
|
||||||
|
* @param errorHolder Used to report errors and coordinate early exit
|
||||||
|
* @param outs A queue of output queues, one for each
|
||||||
|
* (de)compression job.
|
||||||
|
* @param outputFd The file descriptor to write to
|
||||||
|
* @param writeSkippableFrames Should we write pzstd headers?
|
||||||
|
* @returns The number of bytes written
|
||||||
|
*/
|
||||||
|
std::size_t writeFile(
|
||||||
|
ErrorHolder& errorHolder,
|
||||||
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
|
||||||
|
FILE* outputFd,
|
||||||
|
bool writeSkippableFrames);
|
||||||
|
}
|
48
contrib/pzstd/README.md
Normal file
48
contrib/pzstd/README.md
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# Parallel Zstandard (PZstandard)
|
||||||
|
|
||||||
|
Parallel Zstandard is a Pigz-like tool for Zstandard.
|
||||||
|
It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores.
|
||||||
|
It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame.
|
||||||
|
It then concatenates the frames together to produce the final compressed output.
|
||||||
|
Optionally, with the `-p` option, PZstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame.
|
||||||
|
When `-p` is specified for compression, PZstandard can decompress the output in parallel.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Basic usage
|
||||||
|
|
||||||
|
pzstd input-file -o output-file -n num-threads [ -p ] -# # Compression
|
||||||
|
pzstd -d input-file -o output-file -n num-threads # Decompression
|
||||||
|
|
||||||
|
PZstandard also supports piping and fifo pipes
|
||||||
|
|
||||||
|
cat input-file | pzstd -n num-threads [ -p ] -# -c > /dev/null
|
||||||
|
|
||||||
|
For more options
|
||||||
|
|
||||||
|
pzstd --help
|
||||||
|
|
||||||
|
## Benchmarks
|
||||||
|
|
||||||
|
As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia).
|
||||||
|
|
||||||
|
Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads
|
||||||
|
------------------------------------------|-----------------------------------
|
||||||
|
![Compression Speed vs Ratio](images/Cspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed.png "Decompression Speed")
|
||||||
|
|
||||||
|
The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time.
|
||||||
|
|
||||||
|
time pzstd -# -n 4 -p -c silesia.tar > silesia.tar.zst
|
||||||
|
time pzstd -d -n 4 -c silesia.tar.zst > /dev/null
|
||||||
|
|
||||||
|
time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz
|
||||||
|
time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null
|
||||||
|
|
||||||
|
PZstandard was tested using compression levels 1-19, and Pigz was tested using compression levels 1-9.
|
||||||
|
Pigz cannot do parallel decompression, it simply does each of reading, decompression, and writing on separate threads.
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
Tests require that you have [gtest](https://github.com/google/googletest) installed.
|
||||||
|
Modify `GTEST_INC` and `GTEST_LIB` in `test/Makefile` and `utils/test/Makefile` to work for your install of gtest.
|
||||||
|
Then run `make test` in the `contrib/pzstd` directory.
|
30
contrib/pzstd/SkippableFrame.cpp
Normal file
30
contrib/pzstd/SkippableFrame.cpp
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "SkippableFrame.h"
|
||||||
|
#include "common/mem.h"
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) {
|
||||||
|
MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber);
|
||||||
|
MEM_writeLE32(data_.data() + 4, kFrameContentsSize);
|
||||||
|
MEM_writeLE32(data_.data() + 8, frameSize_);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) {
|
||||||
|
if (bytes.size() < SkippableFrame::kSize ||
|
||||||
|
MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber ||
|
||||||
|
MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return MEM_readLE32(bytes.begin() + 8);
|
||||||
|
}
|
64
contrib/pzstd/SkippableFrame.h
Normal file
64
contrib/pzstd/SkippableFrame.h
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
/**
|
||||||
|
* We put a skippable frame before each frame.
|
||||||
|
* It contains a skippable frame magic number, the size of the skippable frame,
|
||||||
|
* and the size of the next frame.
|
||||||
|
* Each skippable frame is exactly 12 bytes in little endian format.
|
||||||
|
* The first 8 bytes are for compatibility with the ZSTD format.
|
||||||
|
* If we have N threads, the output will look like
|
||||||
|
*
|
||||||
|
* [0x184D2A50|4|size1] [frame1 of size size1]
|
||||||
|
* [0x184D2A50|4|size2] [frame2 of size size2]
|
||||||
|
* ...
|
||||||
|
* [0x184D2A50|4|sizeN] [frameN of size sizeN]
|
||||||
|
*
|
||||||
|
* Each sizeX is 4 bytes.
|
||||||
|
*
|
||||||
|
* These skippable frames should allow us to skip through the compressed file
|
||||||
|
* and only load at most N pages.
|
||||||
|
*/
|
||||||
|
class SkippableFrame {
|
||||||
|
public:
|
||||||
|
static constexpr std::size_t kSize = 12;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::uint32_t frameSize_;
|
||||||
|
std::array<std::uint8_t, kSize> data_;
|
||||||
|
static constexpr std::uint32_t kSkippableFrameMagicNumber = 0x184D2A50;
|
||||||
|
// Could be improved if the size fits in less bytes
|
||||||
|
static constexpr std::uint32_t kFrameContentsSize = kSize - 8;
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Write the skippable frame to data_ in LE format.
|
||||||
|
explicit SkippableFrame(std::uint32_t size);
|
||||||
|
|
||||||
|
// Read the skippable frame from bytes in LE format.
|
||||||
|
static std::size_t tryRead(ByteRange bytes);
|
||||||
|
|
||||||
|
ByteRange data() const {
|
||||||
|
return {data_.data(), data_.size()};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size of the next frame.
|
||||||
|
std::size_t frameSize() const {
|
||||||
|
return frameSize_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
BIN
contrib/pzstd/images/Cspeed.png
Normal file
BIN
contrib/pzstd/images/Cspeed.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 57 KiB |
BIN
contrib/pzstd/images/Dspeed.png
Normal file
BIN
contrib/pzstd/images/Dspeed.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
34
contrib/pzstd/main.cpp
Normal file
34
contrib/pzstd/main.cpp
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "ErrorHolder.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "Pzstd.h"
|
||||||
|
#include "utils/FileSystem.h"
|
||||||
|
#include "utils/Range.h"
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
#include "utils/ThreadPool.h"
|
||||||
|
#include "utils/WorkQueue.h"
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
int main(int argc, const char** argv) {
|
||||||
|
Options options;
|
||||||
|
if (!options.parse(argc, argv)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ErrorHolder errorHolder;
|
||||||
|
pzstdMain(options, errorHolder);
|
||||||
|
|
||||||
|
if (errorHolder.hasError()) {
|
||||||
|
std::fprintf(stderr, "Error: %s.\n", errorHolder.getError().c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
48
contrib/pzstd/test/Makefile
Normal file
48
contrib/pzstd/test/Makefile
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# ##########################################################################
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the BSD-style license found in the
|
||||||
|
# LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
# of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
# ##########################################################################
|
||||||
|
|
||||||
|
# Define *.exe as extension for Windows systems
|
||||||
|
ifneq (,$(filter Windows%,$(OS)))
|
||||||
|
EXT =.exe
|
||||||
|
else
|
||||||
|
EXT =
|
||||||
|
endif
|
||||||
|
|
||||||
|
PZSTDDIR = ..
|
||||||
|
PROGDIR = ../../../programs
|
||||||
|
ZSTDDIR = ../../../lib
|
||||||
|
|
||||||
|
# Set GTEST_INC and GTEST_LIB to work with your install of gtest
|
||||||
|
GTEST_INC ?= -isystem $(PZSTDDIR)/googletest/googletest/include
|
||||||
|
GTEST_LIB ?= -L $(PZSTDDIR)/googletest/build/googlemock/gtest
|
||||||
|
|
||||||
|
CPPFLAGS = -I$(PZSTDDIR) $(GTEST_INC) $(GTEST_LIB) -I$(ZSTDDIR)/common -I$(PROGDIR)
|
||||||
|
|
||||||
|
CFLAGS ?= -O3
|
||||||
|
CFLAGS += -std=c++11
|
||||||
|
CFLAGS += $(MOREFLAGS)
|
||||||
|
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
datagen.o: $(PROGDIR)/datagen.*
|
||||||
|
$(CXX) $(FLAGS) $(PROGDIR)/datagen.c -c -o $@
|
||||||
|
|
||||||
|
%: %.cpp *.h datagen.o
|
||||||
|
$(CXX) $(FLAGS) -lgtest -lgtest_main $@.cpp datagen.o $(PZSTDDIR)/libzstd.a $(PZSTDDIR)/Pzstd.o $(PZSTDDIR)/SkippableFrame.o $(PZSTDDIR)/Options.o -o $@$(EXT)
|
||||||
|
|
||||||
|
.PHONY: test clean
|
||||||
|
|
||||||
|
test: OptionsTest PzstdTest
|
||||||
|
@./OptionsTest$(EXT)
|
||||||
|
@./PzstdTest$(EXT)
|
||||||
|
|
||||||
|
roundtrip: RoundTripTest
|
||||||
|
@./RoundTripTest$(EXT)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@rm -f datagen.o OptionsTest PzstdTest RoundTripTest
|
179
contrib/pzstd/test/OptionsTest.cpp
Normal file
179
contrib/pzstd/test/OptionsTest.cpp
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "Options.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
bool operator==(const Options& lhs, const Options& rhs) {
|
||||||
|
return lhs.numThreads == rhs.numThreads &&
|
||||||
|
lhs.maxWindowLog == rhs.maxWindowLog &&
|
||||||
|
lhs.compressionLevel == rhs.compressionLevel &&
|
||||||
|
lhs.decompress == rhs.decompress && lhs.inputFile == rhs.inputFile &&
|
||||||
|
lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite &&
|
||||||
|
lhs.pzstdHeaders == rhs.pzstdHeaders;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, ValidInputs) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 6> args = {
|
||||||
|
{nullptr, "--num-threads", "5", "-o", "-", "-f"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {5, 23, 3, false, "-", "-", true, false};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 6> args = {
|
||||||
|
{nullptr, "-n", "1", "input", "-19", "-p"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {1, 23, 19, false, "input", "input.zst", false, true};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 10> args = {{nullptr,
|
||||||
|
"--ultra",
|
||||||
|
"-22",
|
||||||
|
"-n",
|
||||||
|
"1",
|
||||||
|
"--output",
|
||||||
|
"x",
|
||||||
|
"-d",
|
||||||
|
"x.zst",
|
||||||
|
"-f"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {1, 0, 22, true, "x.zst", "x", true, false};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 6> args = {{nullptr,
|
||||||
|
"--num-threads",
|
||||||
|
"100",
|
||||||
|
"hello.zst",
|
||||||
|
"--decompress",
|
||||||
|
"--force"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {100, 23, 3, true, "hello.zst", "hello", true, false};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 5> args = {{nullptr, "-", "-n", "1", "-c"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {1, 23, 3, false, "-", "-", false, false};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 5> args = {{nullptr, "-", "-n", "1", "--stdout"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {1, 23, 3, false, "-", "-", false, false};
|
||||||
|
EXPECT_EQ(expected, options);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 10> args = {{nullptr,
|
||||||
|
"-n",
|
||||||
|
"1",
|
||||||
|
"-",
|
||||||
|
"-5",
|
||||||
|
"-o",
|
||||||
|
"-",
|
||||||
|
"-u",
|
||||||
|
"-d",
|
||||||
|
"--pzstd-headers"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {1, 0, 5, true, "-", "-", false, true};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 6> args = {
|
||||||
|
{nullptr, "silesia.tar", "-o", "silesia.tar.pzstd", "-n", "2"}};
|
||||||
|
EXPECT_TRUE(options.parse(args.size(), args.data()));
|
||||||
|
Options expected = {
|
||||||
|
2, 23, 3, false, "silesia.tar", "silesia.tar.pzstd", false, false};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, BadNumThreads) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 3> args = {{nullptr, "-o", "-"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 5> args = {{nullptr, "-n", "0", "-o", "-"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 4> args = {{nullptr, "-n", "-o", "-"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, BadCompressionLevel) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 3> args = {{nullptr, "x", "-20"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 4> args = {{nullptr, "x", "-u", "-23"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, InvalidOption) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 3> args = {{nullptr, "x", "-x"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, BadOutputFile) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 5> args = {{nullptr, "notzst", "-d", "-n", "1"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 3> args = {{nullptr, "-n", "1"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 4> args = {{nullptr, "-", "-n", "1"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Options, Extras) {
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 2> args = {{nullptr, "-h"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Options options;
|
||||||
|
std::array<const char*, 2> args = {{nullptr, "-V"}};
|
||||||
|
EXPECT_FALSE(options.parse(args.size(), args.data()));
|
||||||
|
}
|
||||||
|
}
|
121
contrib/pzstd/test/PzstdTest.cpp
Normal file
121
contrib/pzstd/test/PzstdTest.cpp
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "datagen.h"
|
||||||
|
#include "Pzstd.h"
|
||||||
|
#include "test/RoundTrip.h"
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <memory>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
TEST(Pzstd, SmallSizes) {
|
||||||
|
unsigned seed = std::random_device{}();
|
||||||
|
std::fprintf(stderr, "Pzstd.SmallSizes seed: %u\n", seed);
|
||||||
|
std::mt19937 gen(seed);
|
||||||
|
|
||||||
|
for (unsigned len = 1; len < 1028; ++len) {
|
||||||
|
std::string inputFile = std::tmpnam(nullptr);
|
||||||
|
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
|
||||||
|
{
|
||||||
|
static uint8_t buf[1028];
|
||||||
|
RDG_genBuffer(buf, len, 0.5, 0.0, gen());
|
||||||
|
auto fd = std::fopen(inputFile.c_str(), "wb");
|
||||||
|
auto written = std::fwrite(buf, 1, len, fd);
|
||||||
|
std::fclose(fd);
|
||||||
|
ASSERT_EQ(written, len);
|
||||||
|
}
|
||||||
|
for (unsigned headers = 0; headers <= 1; ++headers) {
|
||||||
|
for (unsigned numThreads = 1; numThreads <= 4; numThreads *= 2) {
|
||||||
|
for (unsigned level = 1; level <= 8; level *= 8) {
|
||||||
|
auto errorGuard = makeScopeGuard([&] {
|
||||||
|
guard.dismiss();
|
||||||
|
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
|
||||||
|
std::fprintf(stderr, "pzstd headers: %u\n", headers);
|
||||||
|
std::fprintf(stderr, "# threads: %u\n", numThreads);
|
||||||
|
std::fprintf(stderr, "compression level: %u\n", level);
|
||||||
|
});
|
||||||
|
Options options;
|
||||||
|
options.pzstdHeaders = headers;
|
||||||
|
options.overwrite = true;
|
||||||
|
options.inputFile = inputFile;
|
||||||
|
options.numThreads = numThreads;
|
||||||
|
options.compressionLevel = level;
|
||||||
|
ASSERT_TRUE(roundTrip(options));
|
||||||
|
errorGuard.dismiss();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Pzstd, LargeSizes) {
|
||||||
|
unsigned seed = std::random_device{}();
|
||||||
|
std::fprintf(stderr, "Pzstd.LargeSizes seed: %u\n", seed);
|
||||||
|
std::mt19937 gen(seed);
|
||||||
|
|
||||||
|
for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) {
|
||||||
|
std::string inputFile = std::tmpnam(nullptr);
|
||||||
|
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
|
||||||
|
{
|
||||||
|
std::unique_ptr<uint8_t[]> buf(new uint8_t[len]);
|
||||||
|
RDG_genBuffer(buf.get(), len, 0.5, 0.0, gen());
|
||||||
|
auto fd = std::fopen(inputFile.c_str(), "wb");
|
||||||
|
auto written = std::fwrite(buf.get(), 1, len, fd);
|
||||||
|
std::fclose(fd);
|
||||||
|
ASSERT_EQ(written, len);
|
||||||
|
}
|
||||||
|
for (unsigned headers = 0; headers <= 1; ++headers) {
|
||||||
|
for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
|
||||||
|
for (unsigned level = 1; level <= 4; level *= 2) {
|
||||||
|
auto errorGuard = makeScopeGuard([&] {
|
||||||
|
guard.dismiss();
|
||||||
|
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
|
||||||
|
std::fprintf(stderr, "pzstd headers: %u\n", headers);
|
||||||
|
std::fprintf(stderr, "# threads: %u\n", numThreads);
|
||||||
|
std::fprintf(stderr, "compression level: %u\n", level);
|
||||||
|
});
|
||||||
|
Options options;
|
||||||
|
options.pzstdHeaders = headers;
|
||||||
|
options.overwrite = true;
|
||||||
|
options.inputFile = inputFile;
|
||||||
|
options.numThreads = numThreads;
|
||||||
|
options.compressionLevel = level;
|
||||||
|
ASSERT_TRUE(roundTrip(options));
|
||||||
|
errorGuard.dismiss();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Pzstd, ExtremelyCompressible) {
|
||||||
|
std::string inputFile = std::tmpnam(nullptr);
|
||||||
|
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
|
||||||
|
{
|
||||||
|
std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]);
|
||||||
|
std::memset(buf.get(), 'a', 10000);
|
||||||
|
auto fd = std::fopen(inputFile.c_str(), "wb");
|
||||||
|
auto written = std::fwrite(buf.get(), 1, 10000, fd);
|
||||||
|
std::fclose(fd);
|
||||||
|
ASSERT_EQ(written, 10000);
|
||||||
|
}
|
||||||
|
Options options;
|
||||||
|
options.pzstdHeaders = false;
|
||||||
|
options.overwrite = true;
|
||||||
|
options.inputFile = inputFile;
|
||||||
|
options.numThreads = 1;
|
||||||
|
options.compressionLevel = 1;
|
||||||
|
ASSERT_TRUE(roundTrip(options));
|
||||||
|
}
|
89
contrib/pzstd/test/RoundTrip.h
Normal file
89
contrib/pzstd/test/RoundTrip.h
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Options.h"
|
||||||
|
#include "Pzstd.h"
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
inline bool check(std::string source, std::string decompressed) {
|
||||||
|
std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]);
|
||||||
|
std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]);
|
||||||
|
|
||||||
|
auto sFd = std::fopen(source.c_str(), "rb");
|
||||||
|
auto dFd = std::fopen(decompressed.c_str(), "rb");
|
||||||
|
auto guard = makeScopeGuard([&] {
|
||||||
|
std::fclose(sFd);
|
||||||
|
std::fclose(dFd);
|
||||||
|
});
|
||||||
|
|
||||||
|
size_t sRead, dRead;
|
||||||
|
|
||||||
|
do {
|
||||||
|
sRead = std::fread(sBuf.get(), 1, 1024, sFd);
|
||||||
|
dRead = std::fread(dBuf.get(), 1, 1024, dFd);
|
||||||
|
if (std::ferror(sFd) || std::ferror(dFd)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (sRead != dRead) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sRead; ++i) {
|
||||||
|
if (sBuf.get()[i] != dBuf.get()[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (sRead == 1024);
|
||||||
|
if (!std::feof(sFd) || !std::feof(dFd)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool roundTrip(Options& options) {
|
||||||
|
std::string source = options.inputFile;
|
||||||
|
std::string compressedFile = std::tmpnam(nullptr);
|
||||||
|
std::string decompressedFile = std::tmpnam(nullptr);
|
||||||
|
auto guard = makeScopeGuard([&] {
|
||||||
|
std::remove(compressedFile.c_str());
|
||||||
|
std::remove(decompressedFile.c_str());
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
options.outputFile = compressedFile;
|
||||||
|
options.decompress = false;
|
||||||
|
ErrorHolder errorHolder;
|
||||||
|
pzstdMain(options, errorHolder);
|
||||||
|
if (errorHolder.hasError()) {
|
||||||
|
errorHolder.getError();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
options.decompress = true;
|
||||||
|
options.inputFile = compressedFile;
|
||||||
|
options.outputFile = decompressedFile;
|
||||||
|
ErrorHolder errorHolder;
|
||||||
|
pzstdMain(options, errorHolder);
|
||||||
|
if (errorHolder.hasError()) {
|
||||||
|
errorHolder.getError();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return check(source, decompressedFile);
|
||||||
|
}
|
||||||
|
}
|
88
contrib/pzstd/test/RoundTripTest.cpp
Normal file
88
contrib/pzstd/test/RoundTripTest.cpp
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "datagen.h"
|
||||||
|
#include "Options.h"
|
||||||
|
#include "test/RoundTrip.h"
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
string
|
||||||
|
writeData(size_t size, double matchProba, double litProba, unsigned seed) {
|
||||||
|
std::unique_ptr<uint8_t[]> buf(new uint8_t[size]);
|
||||||
|
RDG_genBuffer(buf.get(), size, matchProba, litProba, seed);
|
||||||
|
string file = tmpnam(nullptr);
|
||||||
|
auto fd = std::fopen(file.c_str(), "wb");
|
||||||
|
auto guard = makeScopeGuard([&] { std::fclose(fd); });
|
||||||
|
auto bytesWritten = std::fwrite(buf.get(), 1, size, fd);
|
||||||
|
if (bytesWritten != size) {
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Generator>
|
||||||
|
string generateInputFile(Generator& gen) {
|
||||||
|
// Use inputs ranging from 1 Byte to 2^16 Bytes
|
||||||
|
std::uniform_int_distribution<size_t> size{1, 1 << 16};
|
||||||
|
std::uniform_real_distribution<> prob{0, 1};
|
||||||
|
return writeData(size(gen), prob(gen), prob(gen), gen());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Generator>
|
||||||
|
Options generateOptions(Generator& gen, const string& inputFile) {
|
||||||
|
Options options;
|
||||||
|
options.inputFile = inputFile;
|
||||||
|
options.overwrite = true;
|
||||||
|
|
||||||
|
std::bernoulli_distribution pzstdHeaders{0.75};
|
||||||
|
std::uniform_int_distribution<unsigned> numThreads{1, 32};
|
||||||
|
std::uniform_int_distribution<unsigned> compressionLevel{1, 10};
|
||||||
|
|
||||||
|
options.pzstdHeaders = pzstdHeaders(gen);
|
||||||
|
options.numThreads = numThreads(gen);
|
||||||
|
options.compressionLevel = compressionLevel(gen);
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
std::mt19937 gen(std::random_device{}());
|
||||||
|
|
||||||
|
auto newlineGuard = makeScopeGuard([] { std::fprintf(stderr, "\n"); });
|
||||||
|
for (unsigned i = 0; i < 10000; ++i) {
|
||||||
|
if (i % 100 == 0) {
|
||||||
|
std::fprintf(stderr, "Progress: %u%%\r", i / 100);
|
||||||
|
}
|
||||||
|
auto inputFile = generateInputFile(gen);
|
||||||
|
auto inputGuard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
|
||||||
|
for (unsigned i = 0; i < 10; ++i) {
|
||||||
|
auto options = generateOptions(gen, inputFile);
|
||||||
|
if (!roundTrip(options)) {
|
||||||
|
std::fprintf(stderr, "numThreads: %u\n", options.numThreads);
|
||||||
|
std::fprintf(stderr, "level: %u\n", options.compressionLevel);
|
||||||
|
std::fprintf(stderr, "decompress? %u\n", (unsigned)options.decompress);
|
||||||
|
std::fprintf(
|
||||||
|
stderr, "pzstd headers? %u\n", (unsigned)options.pzstdHeaders);
|
||||||
|
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
99
contrib/pzstd/utils/Buffer.h
Normal file
99
contrib/pzstd/utils/Buffer.h
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A `Buffer` has a pointer to a shared buffer, and a range of the buffer that
|
||||||
|
* it owns.
|
||||||
|
* The idea is that you can allocate one buffer, and write chunks into it
|
||||||
|
* and break off those chunks.
|
||||||
|
* The underlying buffer is reference counted, and will be destroyed when all
|
||||||
|
* `Buffer`s that reference it are destroyed.
|
||||||
|
*/
|
||||||
|
class Buffer {
|
||||||
|
std::shared_ptr<unsigned char> buffer_;
|
||||||
|
MutableByteRange range_;
|
||||||
|
|
||||||
|
static void delete_buffer(unsigned char* buffer) {
|
||||||
|
delete[] buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Construct an empty buffer that owns no data.
|
||||||
|
explicit Buffer() {}
|
||||||
|
|
||||||
|
/// Construct a `Buffer` that owns a new underlying buffer of size `size`.
|
||||||
|
explicit Buffer(std::size_t size)
|
||||||
|
: buffer_(new unsigned char[size], delete_buffer),
|
||||||
|
range_(buffer_.get(), buffer_.get() + size) {}
|
||||||
|
|
||||||
|
explicit Buffer(std::shared_ptr<unsigned char> buffer, MutableByteRange data)
|
||||||
|
: buffer_(buffer), range_(data) {}
|
||||||
|
|
||||||
|
Buffer(Buffer&&) = default;
|
||||||
|
Buffer& operator=(Buffer&&) & = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits the data into two pieces: [begin, begin + n), [begin + n, end).
|
||||||
|
* Their data both points into the same underlying buffer.
|
||||||
|
* Modifies the original `Buffer` to point to only [begin + n, end).
|
||||||
|
*
|
||||||
|
* @param n The offset to split at.
|
||||||
|
* @returns A buffer that owns the data [begin, begin + n).
|
||||||
|
*/
|
||||||
|
Buffer splitAt(std::size_t n) {
|
||||||
|
auto firstPiece = range_.subpiece(0, n);
|
||||||
|
range_.advance(n);
|
||||||
|
return Buffer(buffer_, firstPiece);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Modifies the buffer to point to the range [begin + n, end).
|
||||||
|
void advance(std::size_t n) {
|
||||||
|
range_.advance(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Modifies the buffer to point to the range [begin, end - n).
|
||||||
|
void subtract(std::size_t n) {
|
||||||
|
range_.subtract(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a read only `Range` pointing to the `Buffer`s data.
|
||||||
|
ByteRange range() const {
|
||||||
|
return range_;
|
||||||
|
}
|
||||||
|
/// Returns a mutable `Range` pointing to the `Buffer`s data.
|
||||||
|
MutableByteRange range() {
|
||||||
|
return range_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned char* data() const {
|
||||||
|
return range_.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char* data() {
|
||||||
|
return range_.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t size() const {
|
||||||
|
return range_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const {
|
||||||
|
return range_.empty();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
61
contrib/pzstd/utils/FileSystem.h
Normal file
61
contrib/pzstd/utils/FileSystem.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <system_error>
|
||||||
|
|
||||||
|
// A small subset of `std::filesystem`.
|
||||||
|
// `std::filesystem` should be a drop in replacement.
|
||||||
|
// See http://en.cppreference.com/w/cpp/filesystem for documentation.
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
using file_status = struct stat;
|
||||||
|
|
||||||
|
/// http://en.cppreference.com/w/cpp/filesystem/status
|
||||||
|
inline file_status status(StringPiece path, std::error_code& ec) noexcept {
|
||||||
|
file_status status;
|
||||||
|
if (stat(path.data(), &status)) {
|
||||||
|
ec.assign(errno, std::generic_category());
|
||||||
|
} else {
|
||||||
|
ec.clear();
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
|
||||||
|
inline bool is_regular_file(file_status status) noexcept {
|
||||||
|
return S_ISREG(status.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
|
||||||
|
inline bool is_regular_file(StringPiece path, std::error_code& ec) noexcept {
|
||||||
|
return is_regular_file(status(path, ec));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// http://en.cppreference.com/w/cpp/filesystem/file_size
|
||||||
|
inline std::uintmax_t file_size(
|
||||||
|
StringPiece path,
|
||||||
|
std::error_code& ec) noexcept {
|
||||||
|
auto stat = status(path, ec);
|
||||||
|
if (ec) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (!is_regular_file(stat)) {
|
||||||
|
ec.assign(ENOTSUP, std::generic_category());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
ec.clear();
|
||||||
|
return stat.st_size;
|
||||||
|
}
|
||||||
|
}
|
28
contrib/pzstd/utils/Likely.h
Normal file
28
contrib/pzstd/utils/Likely.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiler hints to indicate the fast path of an "if" branch: whether
|
||||||
|
* the if condition is likely to be true or false.
|
||||||
|
*
|
||||||
|
* @author Tudor Bosman (tudorb@fb.com)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#undef LIKELY
|
||||||
|
#undef UNLIKELY
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||||
|
#define LIKELY(x) (__builtin_expect((x), 1))
|
||||||
|
#define UNLIKELY(x) (__builtin_expect((x), 0))
|
||||||
|
#else
|
||||||
|
#define LIKELY(x) (x)
|
||||||
|
#define UNLIKELY(x) (x)
|
||||||
|
#endif
|
130
contrib/pzstd/utils/Range.h
Normal file
130
contrib/pzstd/utils/Range.h
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A subset of `folly/Range.h`.
|
||||||
|
* All code copied verbatiam modulo formatting
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/Likely.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
/*
|
||||||
|
*Use IsCharPointer<T>::type to enable const char* or char*.
|
||||||
|
*Use IsCharPointer<T>::const_type to enable only const char*.
|
||||||
|
*/
|
||||||
|
template <class T>
|
||||||
|
struct IsCharPointer {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct IsCharPointer<char*> {
|
||||||
|
typedef int type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct IsCharPointer<const char*> {
|
||||||
|
typedef int const_type;
|
||||||
|
typedef int type;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template <typename Iter>
|
||||||
|
class Range {
|
||||||
|
Iter b_;
|
||||||
|
Iter e_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
using size_type = std::size_t;
|
||||||
|
using iterator = Iter;
|
||||||
|
using const_iterator = Iter;
|
||||||
|
using value_type = typename std::remove_reference<
|
||||||
|
typename std::iterator_traits<Iter>::reference>::type;
|
||||||
|
using reference = typename std::iterator_traits<Iter>::reference;
|
||||||
|
|
||||||
|
constexpr Range() : b_(), e_() {}
|
||||||
|
constexpr Range(Iter begin, Iter end) : b_(begin), e_(end) {}
|
||||||
|
|
||||||
|
constexpr Range(Iter begin, size_type size) : b_(begin), e_(begin + size) {}
|
||||||
|
|
||||||
|
template <class T = Iter, typename detail::IsCharPointer<T>::type = 0>
|
||||||
|
/* implicit */ Range(Iter str) : b_(str), e_(str + std::strlen(str)) {}
|
||||||
|
|
||||||
|
template <class T = Iter, typename detail::IsCharPointer<T>::const_type = 0>
|
||||||
|
/* implicit */ Range(const std::string& str)
|
||||||
|
: b_(str.data()), e_(b_ + str.size()) {}
|
||||||
|
|
||||||
|
// Allow implicit conversion from Range<From> to Range<To> if From is
|
||||||
|
// implicitly convertible to To.
|
||||||
|
template <
|
||||||
|
class OtherIter,
|
||||||
|
typename std::enable_if<
|
||||||
|
(!std::is_same<Iter, OtherIter>::value &&
|
||||||
|
std::is_convertible<OtherIter, Iter>::value),
|
||||||
|
int>::type = 0>
|
||||||
|
constexpr /* implicit */ Range(const Range<OtherIter>& other)
|
||||||
|
: b_(other.begin()), e_(other.end()) {}
|
||||||
|
|
||||||
|
Range(const Range&) = default;
|
||||||
|
Range(Range&&) = default;
|
||||||
|
|
||||||
|
Range& operator=(const Range&) & = default;
|
||||||
|
Range& operator=(Range&&) & = default;
|
||||||
|
|
||||||
|
constexpr size_type size() const {
|
||||||
|
return e_ - b_;
|
||||||
|
}
|
||||||
|
bool empty() const {
|
||||||
|
return b_ == e_;
|
||||||
|
}
|
||||||
|
Iter data() const {
|
||||||
|
return b_;
|
||||||
|
}
|
||||||
|
Iter begin() const {
|
||||||
|
return b_;
|
||||||
|
}
|
||||||
|
Iter end() const {
|
||||||
|
return e_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void advance(size_type n) {
|
||||||
|
if (UNLIKELY(n > size())) {
|
||||||
|
throw std::out_of_range("index out of range");
|
||||||
|
}
|
||||||
|
b_ += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void subtract(size_type n) {
|
||||||
|
if (UNLIKELY(n > size())) {
|
||||||
|
throw std::out_of_range("index out of range");
|
||||||
|
}
|
||||||
|
e_ -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
Range subpiece(size_type first, size_type length = std::string::npos) const {
|
||||||
|
if (UNLIKELY(first > size())) {
|
||||||
|
throw std::out_of_range("index out of range");
|
||||||
|
}
|
||||||
|
|
||||||
|
return Range(b_ + first, std::min(length, size() - first));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using ByteRange = Range<const unsigned char*>;
|
||||||
|
using MutableByteRange = Range<unsigned char*>;
|
||||||
|
using StringPiece = Range<const char*>;
|
||||||
|
}
|
50
contrib/pzstd/utils/ScopeGuard.h
Normal file
50
contrib/pzstd/utils/ScopeGuard.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dismissable scope guard.
|
||||||
|
* `Function` must be callable and take no parameters.
|
||||||
|
* Unless `dissmiss()` is called, the callable is executed upon destruction of
|
||||||
|
* `ScopeGuard`.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* auto guard = makeScopeGuard([&] { cleanup(); });
|
||||||
|
*/
|
||||||
|
template <typename Function>
|
||||||
|
class ScopeGuard {
|
||||||
|
Function function;
|
||||||
|
bool dismissed;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit ScopeGuard(Function&& function)
|
||||||
|
: function(std::move(function)), dismissed(false) {}
|
||||||
|
|
||||||
|
void dismiss() {
|
||||||
|
dismissed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
~ScopeGuard() noexcept {
|
||||||
|
if (!dismissed) {
|
||||||
|
function();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Creates a scope guard from `function`.
|
||||||
|
template <typename Function>
|
||||||
|
ScopeGuard<Function> makeScopeGuard(Function&& function) {
|
||||||
|
return ScopeGuard<Function>(std::forward<Function>(function));
|
||||||
|
}
|
||||||
|
}
|
58
contrib/pzstd/utils/ThreadPool.h
Normal file
58
contrib/pzstd/utils/ThreadPool.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/WorkQueue.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
/// A simple thread pool that pulls tasks off its queue in FIFO order.
|
||||||
|
class ThreadPool {
|
||||||
|
std::vector<std::thread> threads_;
|
||||||
|
|
||||||
|
WorkQueue<std::function<void()>> tasks_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Constructs a thread pool with `numThreads` threads.
|
||||||
|
explicit ThreadPool(std::size_t numThreads) {
|
||||||
|
threads_.reserve(numThreads);
|
||||||
|
for (std::size_t i = 0; i < numThreads; ++i) {
|
||||||
|
threads_.emplace_back([&] {
|
||||||
|
std::function<void()> task;
|
||||||
|
while (tasks_.pop(task)) {
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finishes all tasks currently in the queue.
|
||||||
|
~ThreadPool() {
|
||||||
|
tasks_.finish();
|
||||||
|
for (auto& thread : threads_) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds `task` to the queue of tasks to execute. Since `task` is a
|
||||||
|
* `std::function<>`, it cannot be a move only type. So any lambda passed must
|
||||||
|
* not capture move only types (like `std::unique_ptr`).
|
||||||
|
*
|
||||||
|
* @param task The task to execute.
|
||||||
|
*/
|
||||||
|
void add(std::function<void()> task) {
|
||||||
|
tasks_.push(std::move(task));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
144
contrib/pzstd/utils/WorkQueue.h
Normal file
144
contrib/pzstd/utils/WorkQueue.h
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/Buffer.h"
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <cassert>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
namespace pzstd {
|
||||||
|
|
||||||
|
/// Unbounded thread-safe work queue.
|
||||||
|
template <typename T>
|
||||||
|
class WorkQueue {
|
||||||
|
// Protects all member variable access
|
||||||
|
std::mutex mutex_;
|
||||||
|
std::condition_variable cv_;
|
||||||
|
|
||||||
|
std::queue<T> queue_;
|
||||||
|
bool done_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Constructs an empty work queue.
|
||||||
|
WorkQueue() : done_(false) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push an item onto the work queue. Notify a single thread that work is
|
||||||
|
* available. If `finish()` has been called, do nothing and return false.
|
||||||
|
*
|
||||||
|
* @param item Item to push onto the queue.
|
||||||
|
* @returns True upon success, false if `finish()` has been called. An
|
||||||
|
* item was pushed iff `push()` returns true.
|
||||||
|
*/
|
||||||
|
bool push(T item) {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
if (done_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
queue_.push(std::move(item));
|
||||||
|
}
|
||||||
|
cv_.notify_one();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to pop an item off the work queue. It will block until data is
|
||||||
|
* available or `finish()` has been called.
|
||||||
|
*
|
||||||
|
* @param[out] item If `pop` returns `true`, it contains the popped item.
|
||||||
|
* If `pop` returns `false`, it is unmodified.
|
||||||
|
* @returns True upon success. False if the queue is empty and
|
||||||
|
* `finish()` has been called.
|
||||||
|
*/
|
||||||
|
bool pop(T& item) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (queue_.empty() && !done_) {
|
||||||
|
cv_.wait(lock);
|
||||||
|
}
|
||||||
|
if (queue_.empty()) {
|
||||||
|
assert(done_);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
item = std::move(queue_.front());
|
||||||
|
queue_.pop();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promise that `push()` won't be called again, so once the queue is empty
|
||||||
|
* there will never any more work.
|
||||||
|
*/
|
||||||
|
void finish() {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
assert(!done_);
|
||||||
|
done_ = true;
|
||||||
|
}
|
||||||
|
cv_.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Blocks until `finish()` has been called (but the queue may not be empty).
|
||||||
|
void waitUntilFinished() {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_);
|
||||||
|
while (!done_) {
|
||||||
|
cv_.wait(lock);
|
||||||
|
// If we were woken by a push, we need to wake a thread waiting on pop().
|
||||||
|
if (!done_) {
|
||||||
|
lock.unlock();
|
||||||
|
cv_.notify_one();
|
||||||
|
lock.lock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Work queue for `Buffer`s that knows the total number of bytes in the queue.
|
||||||
|
class BufferWorkQueue {
|
||||||
|
WorkQueue<Buffer> queue_;
|
||||||
|
std::atomic<std::size_t> size_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
BufferWorkQueue() : size_(0) {}
|
||||||
|
|
||||||
|
void push(Buffer buffer) {
|
||||||
|
size_.fetch_add(buffer.size());
|
||||||
|
queue_.push(std::move(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool pop(Buffer& buffer) {
|
||||||
|
bool result = queue_.pop(buffer);
|
||||||
|
if (result) {
|
||||||
|
size_.fetch_sub(buffer.size());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void finish() {
|
||||||
|
queue_.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Blocks until `finish()` has been called.
|
||||||
|
*
|
||||||
|
* @returns The total number of bytes of all the `Buffer`s currently in the
|
||||||
|
* queue.
|
||||||
|
*/
|
||||||
|
std::size_t size() {
|
||||||
|
queue_.waitUntilFinished();
|
||||||
|
return size_.load();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
89
contrib/pzstd/utils/test/BufferTest.cpp
Normal file
89
contrib/pzstd/utils/test/BufferTest.cpp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "utils/Buffer.h"
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
void deleter(const unsigned char* buf) {
|
||||||
|
delete[] buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Buffer, Constructors) {
|
||||||
|
Buffer empty;
|
||||||
|
EXPECT_TRUE(empty.empty());
|
||||||
|
EXPECT_EQ(0, empty.size());
|
||||||
|
|
||||||
|
Buffer sized(5);
|
||||||
|
EXPECT_FALSE(sized.empty());
|
||||||
|
EXPECT_EQ(5, sized.size());
|
||||||
|
|
||||||
|
Buffer moved(std::move(sized));
|
||||||
|
EXPECT_FALSE(sized.empty());
|
||||||
|
EXPECT_EQ(5, sized.size());
|
||||||
|
|
||||||
|
Buffer assigned;
|
||||||
|
assigned = std::move(moved);
|
||||||
|
EXPECT_FALSE(sized.empty());
|
||||||
|
EXPECT_EQ(5, sized.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Buffer, BufferManagement) {
|
||||||
|
std::shared_ptr<unsigned char> buf(new unsigned char[10], deleter);
|
||||||
|
{
|
||||||
|
Buffer acquired(buf, MutableByteRange(buf.get(), buf.get() + 10));
|
||||||
|
EXPECT_EQ(2, buf.use_count());
|
||||||
|
Buffer moved(std::move(acquired));
|
||||||
|
EXPECT_EQ(2, buf.use_count());
|
||||||
|
Buffer assigned;
|
||||||
|
assigned = std::move(moved);
|
||||||
|
EXPECT_EQ(2, buf.use_count());
|
||||||
|
|
||||||
|
Buffer split = assigned.splitAt(5);
|
||||||
|
EXPECT_EQ(3, buf.use_count());
|
||||||
|
|
||||||
|
split.advance(1);
|
||||||
|
assigned.subtract(1);
|
||||||
|
EXPECT_EQ(3, buf.use_count());
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1, buf.use_count());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Buffer, Modifiers) {
|
||||||
|
Buffer buf(10);
|
||||||
|
{
|
||||||
|
unsigned char i = 0;
|
||||||
|
for (auto& byte : buf.range()) {
|
||||||
|
byte = i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto prefix = buf.splitAt(2);
|
||||||
|
|
||||||
|
ASSERT_EQ(2, prefix.size());
|
||||||
|
EXPECT_EQ(0, *prefix.data());
|
||||||
|
|
||||||
|
ASSERT_EQ(8, buf.size());
|
||||||
|
EXPECT_EQ(2, *buf.data());
|
||||||
|
|
||||||
|
buf.advance(2);
|
||||||
|
EXPECT_EQ(4, *buf.data());
|
||||||
|
|
||||||
|
EXPECT_EQ(9, *(buf.range().end() - 1));
|
||||||
|
|
||||||
|
buf.subtract(2);
|
||||||
|
EXPECT_EQ(7, *(buf.range().end() - 1));
|
||||||
|
|
||||||
|
EXPECT_EQ(4, buf.size());
|
||||||
|
}
|
42
contrib/pzstd/utils/test/Makefile
Normal file
42
contrib/pzstd/utils/test/Makefile
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# ##########################################################################
|
||||||
|
# Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the BSD-style license found in the
|
||||||
|
# LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
# of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
# ##########################################################################
|
||||||
|
|
||||||
|
# Define *.exe as extension for Windows systems
|
||||||
|
ifneq (,$(filter Windows%,$(OS)))
|
||||||
|
EXT =.exe
|
||||||
|
else
|
||||||
|
EXT =
|
||||||
|
endif
|
||||||
|
|
||||||
|
PZSTDDIR = ../..
|
||||||
|
|
||||||
|
# Set GTEST_INC and GTEST_LIB to work with your install of gtest
|
||||||
|
GTEST_INC ?= -isystem $(PZSTDDIR)/googletest/googletest/include
|
||||||
|
GTEST_LIB ?= -L $(PZSTDDIR)/googletest/build/googlemock/gtest
|
||||||
|
|
||||||
|
CPPFLAGS = -I$(PZSTDDIR) $(GTEST_INC) $(GTEST_LIB)
|
||||||
|
CFLAGS ?= -O3
|
||||||
|
CFLAGS += -std=c++11
|
||||||
|
CFLAGS += $(MOREFLAGS)
|
||||||
|
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
|
%: %.cpp
|
||||||
|
$(CXX) $(FLAGS) -lgtest -lgtest_main $^ -o $@$(EXT)
|
||||||
|
|
||||||
|
.PHONY: test clean
|
||||||
|
|
||||||
|
test: BufferTest RangeTest ScopeGuardTest ThreadPoolTest WorkQueueTest
|
||||||
|
@./BufferTest$(EXT)
|
||||||
|
@./RangeTest$(EXT)
|
||||||
|
@./ScopeGuardTest$(EXT)
|
||||||
|
@./ThreadPoolTest$(EXT)
|
||||||
|
@./WorkQueueTest$(EXT)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@rm -f BufferTest RangeTest ScopeGuardTest ThreadPoolTest WorkQueueTest
|
82
contrib/pzstd/utils/test/RangeTest.cpp
Normal file
82
contrib/pzstd/utils/test/RangeTest.cpp
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "utils/Range.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
// Range is directly copied from folly.
|
||||||
|
// Just some sanity tests to make sure everything seems to work.
|
||||||
|
|
||||||
|
TEST(Range, Constructors) {
|
||||||
|
StringPiece empty;
|
||||||
|
EXPECT_TRUE(empty.empty());
|
||||||
|
EXPECT_EQ(0, empty.size());
|
||||||
|
|
||||||
|
std::string str = "hello";
|
||||||
|
{
|
||||||
|
Range<std::string::const_iterator> piece(str.begin(), str.end());
|
||||||
|
EXPECT_EQ(5, piece.size());
|
||||||
|
EXPECT_EQ('h', *piece.data());
|
||||||
|
EXPECT_EQ('o', *(piece.end() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
StringPiece piece(str.data(), str.size());
|
||||||
|
EXPECT_EQ(5, piece.size());
|
||||||
|
EXPECT_EQ('h', *piece.data());
|
||||||
|
EXPECT_EQ('o', *(piece.end() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
StringPiece piece(str);
|
||||||
|
EXPECT_EQ(5, piece.size());
|
||||||
|
EXPECT_EQ('h', *piece.data());
|
||||||
|
EXPECT_EQ('o', *(piece.end() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
StringPiece piece(str.c_str());
|
||||||
|
EXPECT_EQ(5, piece.size());
|
||||||
|
EXPECT_EQ('h', *piece.data());
|
||||||
|
EXPECT_EQ('o', *(piece.end() - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Range, Modifiers) {
|
||||||
|
StringPiece range("hello world");
|
||||||
|
ASSERT_EQ(11, range.size());
|
||||||
|
|
||||||
|
{
|
||||||
|
auto hello = range.subpiece(0, 5);
|
||||||
|
EXPECT_EQ(5, hello.size());
|
||||||
|
EXPECT_EQ('h', *hello.data());
|
||||||
|
EXPECT_EQ('o', *(hello.end() - 1));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto hello = range;
|
||||||
|
hello.subtract(6);
|
||||||
|
EXPECT_EQ(5, hello.size());
|
||||||
|
EXPECT_EQ('h', *hello.data());
|
||||||
|
EXPECT_EQ('o', *(hello.end() - 1));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto world = range;
|
||||||
|
world.advance(6);
|
||||||
|
EXPECT_EQ(5, world.size());
|
||||||
|
EXPECT_EQ('w', *world.data());
|
||||||
|
EXPECT_EQ('d', *(world.end() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string expected = "hello world";
|
||||||
|
EXPECT_EQ(expected, std::string(range.begin(), range.end()));
|
||||||
|
EXPECT_EQ(expected, std::string(range.data(), range.size()));
|
||||||
|
}
|
28
contrib/pzstd/utils/test/ScopeGuardTest.cpp
Normal file
28
contrib/pzstd/utils/test/ScopeGuardTest.cpp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "utils/ScopeGuard.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
TEST(ScopeGuard, Dismiss) {
|
||||||
|
{
|
||||||
|
auto guard = makeScopeGuard([&] { EXPECT_TRUE(false); });
|
||||||
|
guard.dismiss();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ScopeGuard, Executes) {
|
||||||
|
bool executed = false;
|
||||||
|
{
|
||||||
|
auto guard = makeScopeGuard([&] { executed = true; });
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(executed);
|
||||||
|
}
|
67
contrib/pzstd/utils/test/ThreadPoolTest.cpp
Normal file
67
contrib/pzstd/utils/test/ThreadPoolTest.cpp
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "utils/ThreadPool.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <atomic>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
TEST(ThreadPool, Ordering) {
|
||||||
|
std::vector<int> results;
|
||||||
|
|
||||||
|
{
|
||||||
|
ThreadPool executor(1);
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
executor.add([ &results, i ] { results.push_back(i); });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
EXPECT_EQ(i, results[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ThreadPool, AllJobsFinished) {
|
||||||
|
std::atomic<unsigned> numFinished{0};
|
||||||
|
std::atomic<bool> start{false};
|
||||||
|
{
|
||||||
|
ThreadPool executor(5);
|
||||||
|
for (int i = 0; i < 1000; ++i) {
|
||||||
|
executor.add([ &numFinished, &start ] {
|
||||||
|
while (!start.load()) {
|
||||||
|
// spin
|
||||||
|
}
|
||||||
|
++numFinished;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
start.store(true);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(1000, numFinished.load());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ThreadPool, AddJobWhileJoining) {
|
||||||
|
std::atomic<bool> done{false};
|
||||||
|
{
|
||||||
|
ThreadPool executor(1);
|
||||||
|
executor.add([&executor, &done] {
|
||||||
|
while (!done.load()) {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
// Sleep for a second to be sure that we are joining
|
||||||
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||||
|
executor.add([] {
|
||||||
|
EXPECT_TRUE(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
done.store(true);
|
||||||
|
}
|
||||||
|
}
|
176
contrib/pzstd/utils/test/WorkQueueTest.cpp
Normal file
176
contrib/pzstd/utils/test/WorkQueueTest.cpp
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
#include "utils/Buffer.h"
|
||||||
|
#include "utils/WorkQueue.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <mutex>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace pzstd;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct Popper {
|
||||||
|
WorkQueue<int>* queue;
|
||||||
|
int* results;
|
||||||
|
std::mutex* mutex;
|
||||||
|
|
||||||
|
void operator()() {
|
||||||
|
int result;
|
||||||
|
while (queue->pop(result)) {
|
||||||
|
std::lock_guard<std::mutex> lock(*mutex);
|
||||||
|
results[result] = result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WorkQueue, SingleThreaded) {
|
||||||
|
WorkQueue<int> queue;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
queue.push(5);
|
||||||
|
EXPECT_TRUE(queue.pop(result));
|
||||||
|
EXPECT_EQ(5, result);
|
||||||
|
|
||||||
|
queue.push(1);
|
||||||
|
queue.push(2);
|
||||||
|
EXPECT_TRUE(queue.pop(result));
|
||||||
|
EXPECT_EQ(1, result);
|
||||||
|
EXPECT_TRUE(queue.pop(result));
|
||||||
|
EXPECT_EQ(2, result);
|
||||||
|
|
||||||
|
queue.push(1);
|
||||||
|
queue.push(2);
|
||||||
|
queue.finish();
|
||||||
|
EXPECT_TRUE(queue.pop(result));
|
||||||
|
EXPECT_EQ(1, result);
|
||||||
|
EXPECT_TRUE(queue.pop(result));
|
||||||
|
EXPECT_EQ(2, result);
|
||||||
|
EXPECT_FALSE(queue.pop(result));
|
||||||
|
|
||||||
|
queue.waitUntilFinished();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WorkQueue, SPSC) {
|
||||||
|
WorkQueue<int> queue;
|
||||||
|
const int max = 100;
|
||||||
|
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
queue.push(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::thread thread([ &queue, max ] {
|
||||||
|
int result;
|
||||||
|
for (int i = 0;; ++i) {
|
||||||
|
if (!queue.pop(result)) {
|
||||||
|
EXPECT_EQ(i, max);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(i, result);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
std::this_thread::yield();
|
||||||
|
for (int i = 10; i < max; ++i) {
|
||||||
|
queue.push(i);
|
||||||
|
}
|
||||||
|
queue.finish();
|
||||||
|
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WorkQueue, SPMC) {
|
||||||
|
WorkQueue<int> queue;
|
||||||
|
std::vector<int> results(10000, -1);
|
||||||
|
std::mutex mutex;
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
threads.emplace_back(Popper{&queue, results.data(), &mutex});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 10000; ++i) {
|
||||||
|
queue.push(i);
|
||||||
|
}
|
||||||
|
queue.finish();
|
||||||
|
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 10000; ++i) {
|
||||||
|
EXPECT_EQ(i, results[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WorkQueue, MPMC) {
|
||||||
|
WorkQueue<int> queue;
|
||||||
|
std::vector<int> results(10000, -1);
|
||||||
|
std::mutex mutex;
|
||||||
|
std::vector<std::thread> popperThreads;
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::thread> pusherThreads;
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
auto min = i * 1000;
|
||||||
|
auto max = (i + 1) * 1000;
|
||||||
|
pusherThreads.emplace_back(
|
||||||
|
[ &queue, min, max ] {
|
||||||
|
for (int i = min; i < max; ++i) {
|
||||||
|
queue.push(i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& thread : pusherThreads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
queue.finish();
|
||||||
|
|
||||||
|
for (auto& thread : popperThreads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 10000; ++i) {
|
||||||
|
EXPECT_EQ(i, results[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BufferWorkQueue, SizeCalculatedCorrectly) {
|
||||||
|
{
|
||||||
|
BufferWorkQueue queue;
|
||||||
|
queue.finish();
|
||||||
|
EXPECT_EQ(0, queue.size());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BufferWorkQueue queue;
|
||||||
|
queue.push(Buffer(10));
|
||||||
|
queue.finish();
|
||||||
|
EXPECT_EQ(10, queue.size());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BufferWorkQueue queue;
|
||||||
|
queue.push(Buffer(10));
|
||||||
|
queue.push(Buffer(5));
|
||||||
|
queue.finish();
|
||||||
|
EXPECT_EQ(15, queue.size());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BufferWorkQueue queue;
|
||||||
|
queue.push(Buffer(10));
|
||||||
|
queue.push(Buffer(5));
|
||||||
|
queue.finish();
|
||||||
|
Buffer buffer;
|
||||||
|
queue.pop(buffer);
|
||||||
|
EXPECT_EQ(5, queue.size());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user