2016-09-01 22:22:19 +00:00
|
|
|
/**
|
|
|
|
* Copyright (c) 2016-present, Facebook, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This source code is licensed under the BSD-style license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
* of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
*/
|
|
|
|
#include "Pzstd.h"
|
|
|
|
#include "SkippableFrame.h"
|
|
|
|
#include "utils/FileSystem.h"
|
|
|
|
#include "utils/Range.h"
|
|
|
|
#include "utils/ScopeGuard.h"
|
|
|
|
#include "utils/ThreadPool.h"
|
|
|
|
#include "utils/WorkQueue.h"
|
|
|
|
|
2016-09-23 22:47:26 +00:00
|
|
|
#include <chrono>
|
2016-10-07 04:31:16 +00:00
|
|
|
#include <cinttypes>
|
2016-09-01 22:22:19 +00:00
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
|
2016-09-21 21:29:47 +00:00
|
|
|
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
|
|
|
|
# include <fcntl.h> /* _O_BINARY */
|
|
|
|
# include <io.h> /* _setmode, _isatty */
|
|
|
|
# define SET_BINARY_MODE(file) { if (_setmode(_fileno(file), _O_BINARY) == -1) perror("Cannot set _O_BINARY"); }
|
|
|
|
#else
|
|
|
|
# include <unistd.h> /* isatty */
|
|
|
|
# define SET_BINARY_MODE(file)
|
|
|
|
#endif
|
|
|
|
|
2016-09-01 22:22:19 +00:00
|
|
|
namespace pzstd {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
#ifdef _WIN32
|
|
|
|
const std::string nullOutput = "nul";
|
|
|
|
#else
|
|
|
|
const std::string nullOutput = "/dev/null";
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
using std::size_t;
|
|
|
|
|
2016-09-21 21:29:47 +00:00
|
|
|
static std::uintmax_t fileSizeOrZero(const std::string &file) {
|
|
|
|
if (file == "-") {
|
|
|
|
return 0;
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
2016-09-21 21:29:47 +00:00
|
|
|
std::error_code ec;
|
|
|
|
auto size = file_size(file, ec);
|
|
|
|
if (ec) {
|
|
|
|
size = 0;
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
2016-09-21 21:29:47 +00:00
|
|
|
return size;
|
|
|
|
}
|
2016-09-01 22:22:19 +00:00
|
|
|
|
2016-09-23 19:55:21 +00:00
|
|
|
static std::uint64_t handleOneInput(const Options &options,
|
2016-09-21 21:29:47 +00:00
|
|
|
const std::string &inputFile,
|
|
|
|
FILE* inputFd,
|
2016-09-23 19:55:21 +00:00
|
|
|
const std::string &outputFile,
|
2016-09-21 21:29:47 +00:00
|
|
|
FILE* outputFd,
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state) {
|
2016-09-21 21:29:47 +00:00
|
|
|
auto inputSize = fileSizeOrZero(inputFile);
|
2016-09-01 22:22:19 +00:00
|
|
|
// WorkQueue outlives ThreadPool so in the case of error we are certain
|
2016-10-07 22:04:34 +00:00
|
|
|
// we don't accidently try to call push() on it after it is destroyed
|
2016-09-21 22:12:23 +00:00
|
|
|
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t bytesRead;
|
|
|
|
std::uint64_t bytesWritten;
|
2016-09-01 22:22:19 +00:00
|
|
|
{
|
2016-10-07 22:04:34 +00:00
|
|
|
// Initialize the (de)compression thread pool with numThreads
|
|
|
|
ThreadPool executor(options.numThreads);
|
|
|
|
// Run the reader thread on an extra thread
|
|
|
|
ThreadPool readExecutor(1);
|
2016-09-01 22:22:19 +00:00
|
|
|
if (!options.decompress) {
|
|
|
|
// Add a job that reads the input and starts all the compression jobs
|
2016-10-07 22:04:34 +00:00
|
|
|
readExecutor.add(
|
2016-10-12 22:18:16 +00:00
|
|
|
[&state, &outs, &executor, inputFd, inputSize, &options, &bytesRead] {
|
2016-09-23 19:55:21 +00:00
|
|
|
bytesRead = asyncCompressChunks(
|
2016-10-12 22:18:16 +00:00
|
|
|
state,
|
2016-09-01 22:22:19 +00:00
|
|
|
outs,
|
|
|
|
executor,
|
|
|
|
inputFd,
|
|
|
|
inputSize,
|
|
|
|
options.numThreads,
|
|
|
|
options.determineParameters());
|
|
|
|
});
|
|
|
|
// Start writing
|
2016-10-13 02:02:27 +00:00
|
|
|
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
|
2016-09-01 22:22:19 +00:00
|
|
|
} else {
|
|
|
|
// Add a job that reads the input and starts all the decompression jobs
|
2016-10-12 22:18:16 +00:00
|
|
|
readExecutor.add([&state, &outs, &executor, inputFd, &bytesRead] {
|
|
|
|
bytesRead = asyncDecompressFrames(state, outs, executor, inputFd);
|
2016-09-01 22:22:19 +00:00
|
|
|
});
|
|
|
|
// Start writing
|
2016-10-13 02:02:27 +00:00
|
|
|
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
}
|
2016-10-13 02:02:27 +00:00
|
|
|
if (!state.errorHolder.hasError()) {
|
2016-09-23 19:55:21 +00:00
|
|
|
std::string inputFileName = inputFile == "-" ? "stdin" : inputFile;
|
|
|
|
std::string outputFileName = outputFile == "-" ? "stdout" : outputFile;
|
|
|
|
if (!options.decompress) {
|
|
|
|
double ratio = static_cast<double>(bytesWritten) /
|
|
|
|
static_cast<double>(bytesRead + !bytesRead);
|
2016-10-13 02:02:27 +00:00
|
|
|
state.log(INFO, "%-20s :%6.2f%% (%6" PRIu64 " => %6" PRIu64
|
2016-10-07 04:31:16 +00:00
|
|
|
" bytes, %s)\n",
|
2016-09-23 19:55:21 +00:00
|
|
|
inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten,
|
|
|
|
outputFileName.c_str());
|
|
|
|
} else {
|
2016-10-13 02:02:27 +00:00
|
|
|
state.log(INFO, "%-20s: %" PRIu64 " bytes \n",
|
2016-09-23 19:55:21 +00:00
|
|
|
inputFileName.c_str(),bytesWritten);
|
|
|
|
}
|
|
|
|
}
|
2016-09-01 22:22:19 +00:00
|
|
|
return bytesWritten;
|
|
|
|
}
|
|
|
|
|
2016-09-21 21:29:47 +00:00
|
|
|
static FILE *openInputFile(const std::string &inputFile,
|
|
|
|
ErrorHolder &errorHolder) {
|
|
|
|
if (inputFile == "-") {
|
|
|
|
SET_BINARY_MODE(stdin);
|
|
|
|
return stdin;
|
|
|
|
}
|
2016-09-21 23:25:08 +00:00
|
|
|
// Check if input file is a directory
|
|
|
|
{
|
|
|
|
std::error_code ec;
|
|
|
|
if (is_directory(inputFile, ec)) {
|
|
|
|
errorHolder.setError("Output file is a directory -- ignored");
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
2016-09-21 21:29:47 +00:00
|
|
|
auto inputFd = std::fopen(inputFile.c_str(), "rb");
|
|
|
|
if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
return inputFd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static FILE *openOutputFile(const Options &options,
|
|
|
|
const std::string &outputFile,
|
2016-10-13 02:02:27 +00:00
|
|
|
SharedState& state) {
|
2016-09-21 21:29:47 +00:00
|
|
|
if (outputFile == "-") {
|
|
|
|
SET_BINARY_MODE(stdout);
|
|
|
|
return stdout;
|
|
|
|
}
|
|
|
|
// Check if the output file exists and then open it
|
|
|
|
if (!options.overwrite && outputFile != nullOutput) {
|
|
|
|
auto outputFd = std::fopen(outputFile.c_str(), "rb");
|
|
|
|
if (outputFd != nullptr) {
|
|
|
|
std::fclose(outputFd);
|
2016-10-13 02:02:27 +00:00
|
|
|
if (!state.log.logsAt(INFO)) {
|
|
|
|
state.errorHolder.setError("Output file exists");
|
2016-09-21 21:29:47 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
2016-10-13 02:02:27 +00:00
|
|
|
state.log(
|
|
|
|
INFO,
|
2016-09-21 21:29:47 +00:00
|
|
|
"pzstd: %s already exists; do you wish to overwrite (y/n) ? ",
|
|
|
|
outputFile.c_str());
|
|
|
|
int c = getchar();
|
|
|
|
if (c != 'y' && c != 'Y') {
|
2016-10-13 02:02:27 +00:00
|
|
|
state.errorHolder.setError("Not overwritten");
|
2016-09-21 21:29:47 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto outputFd = std::fopen(outputFile.c_str(), "wb");
|
2016-10-13 02:02:27 +00:00
|
|
|
if (!state.errorHolder.check(
|
2016-09-21 21:29:47 +00:00
|
|
|
outputFd != nullptr, "Failed to open output file")) {
|
2016-10-13 02:02:27 +00:00
|
|
|
return nullptr;
|
2016-09-21 21:29:47 +00:00
|
|
|
}
|
|
|
|
return outputFd;
|
|
|
|
}
|
|
|
|
|
|
|
|
int pzstdMain(const Options &options) {
|
|
|
|
int returnCode = 0;
|
2016-10-13 02:02:27 +00:00
|
|
|
SharedState state(options);
|
2016-09-21 21:29:47 +00:00
|
|
|
for (const auto& input : options.inputFiles) {
|
2016-10-13 00:23:38 +00:00
|
|
|
// Setup the shared state
|
2016-09-21 21:29:47 +00:00
|
|
|
auto printErrorGuard = makeScopeGuard([&] {
|
2016-10-12 22:18:16 +00:00
|
|
|
if (state.errorHolder.hasError()) {
|
2016-09-21 21:29:47 +00:00
|
|
|
returnCode = 1;
|
2016-10-13 02:02:27 +00:00
|
|
|
state.log(ERROR, "pzstd: %s: %s.\n", input.c_str(),
|
|
|
|
state.errorHolder.getError().c_str());
|
2016-09-21 21:29:47 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
// Open the input file
|
2016-10-12 22:18:16 +00:00
|
|
|
auto inputFd = openInputFile(input, state.errorHolder);
|
2016-09-21 21:29:47 +00:00
|
|
|
if (inputFd == nullptr) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); });
|
|
|
|
// Open the output file
|
|
|
|
auto outputFile = options.getOutputFile(input);
|
2016-10-12 22:18:16 +00:00
|
|
|
if (!state.errorHolder.check(outputFile != "",
|
2016-09-21 21:29:47 +00:00
|
|
|
"Input file does not have extension .zst")) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-10-13 02:02:27 +00:00
|
|
|
auto outputFd = openOutputFile(options, outputFile, state);
|
2016-09-21 21:29:47 +00:00
|
|
|
if (outputFd == nullptr) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); });
|
|
|
|
// (de)compress the file
|
2016-10-12 22:18:16 +00:00
|
|
|
handleOneInput(options, input, inputFd, outputFile, outputFd, state);
|
|
|
|
if (state.errorHolder.hasError()) {
|
2016-09-21 21:29:47 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Delete the input file if necessary
|
|
|
|
if (!options.keepSource) {
|
|
|
|
// Be sure that we are done and have written everything before we delete
|
2016-10-12 22:18:16 +00:00
|
|
|
if (!state.errorHolder.check(std::fclose(inputFd) == 0,
|
2016-09-21 21:29:47 +00:00
|
|
|
"Failed to close input file")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
closeInputGuard.dismiss();
|
2016-10-12 22:18:16 +00:00
|
|
|
if (!state.errorHolder.check(std::fclose(outputFd) == 0,
|
2016-09-21 21:29:47 +00:00
|
|
|
"Failed to close output file")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
closeOutputGuard.dismiss();
|
|
|
|
if (std::remove(input.c_str()) != 0) {
|
2016-10-12 22:18:16 +00:00
|
|
|
state.errorHolder.setError("Failed to remove input file");
|
2016-09-21 21:29:47 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Returns 1 if any of the files failed to (de)compress.
|
|
|
|
return returnCode;
|
|
|
|
}
|
|
|
|
|
2016-09-01 22:22:19 +00:00
|
|
|
/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`.
|
|
|
|
static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) {
|
|
|
|
return ZSTD_inBuffer{buffer.data(), buffer.size(), 0};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Advance `buffer` and `inBuffer` by the amount of data read, as indicated by
|
|
|
|
* `inBuffer.pos`.
|
|
|
|
*/
|
|
|
|
void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) {
|
|
|
|
auto pos = inBuffer.pos;
|
|
|
|
inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos;
|
|
|
|
inBuffer.size -= pos;
|
|
|
|
inBuffer.pos = 0;
|
|
|
|
return buffer.advance(pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`.
|
|
|
|
static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) {
|
|
|
|
return ZSTD_outBuffer{buffer.data(), buffer.size(), 0};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Split `buffer` and advance `outBuffer` by the amount of data written, as
|
|
|
|
* indicated by `outBuffer.pos`.
|
|
|
|
*/
|
|
|
|
Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) {
|
|
|
|
auto pos = outBuffer.pos;
|
|
|
|
outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos;
|
|
|
|
outBuffer.size -= pos;
|
|
|
|
outBuffer.pos = 0;
|
|
|
|
return buffer.splitAt(pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Stream chunks of input from `in`, compress it, and stream it out to `out`.
|
|
|
|
*
|
2016-10-12 22:18:16 +00:00
|
|
|
* @param state The shared state
|
2016-09-01 22:22:19 +00:00
|
|
|
* @param in Queue that we `pop()` input buffers from
|
|
|
|
* @param out Queue that we `push()` compressed output buffers to
|
|
|
|
* @param maxInputSize An upper bound on the size of the input
|
|
|
|
*/
|
|
|
|
static void compress(
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state,
|
2016-09-01 22:22:19 +00:00
|
|
|
std::shared_ptr<BufferWorkQueue> in,
|
|
|
|
std::shared_ptr<BufferWorkQueue> out,
|
2016-10-13 00:23:38 +00:00
|
|
|
size_t maxInputSize) {
|
2016-10-12 22:18:16 +00:00
|
|
|
auto& errorHolder = state.errorHolder;
|
2016-09-01 22:22:19 +00:00
|
|
|
auto guard = makeScopeGuard([&] { out->finish(); });
|
|
|
|
// Initialize the CCtx
|
2016-10-13 00:23:38 +00:00
|
|
|
auto ctx = state.cStreamPool->get();
|
2016-09-01 22:22:19 +00:00
|
|
|
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
{
|
2016-10-13 00:23:38 +00:00
|
|
|
auto err = ZSTD_resetCStream(ctx.get(), 0);
|
2016-09-01 22:22:19 +00:00
|
|
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate space for the result
|
|
|
|
auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize));
|
|
|
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
|
|
|
{
|
|
|
|
Buffer inBuffer;
|
|
|
|
// Read a buffer in from the input queue
|
|
|
|
while (in->pop(inBuffer) && !errorHolder.hasError()) {
|
|
|
|
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
|
|
|
|
// Compress the whole buffer and send it to the output queue
|
|
|
|
while (!inBuffer.empty() && !errorHolder.hasError()) {
|
|
|
|
if (!errorHolder.check(
|
|
|
|
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Compress
|
|
|
|
auto err =
|
|
|
|
ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
|
|
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Split the compressed data off outBuffer and pass to the output queue
|
|
|
|
out->push(split(outBuffer, zstdOutBuffer));
|
|
|
|
// Forget about the data we already compressed
|
|
|
|
advance(inBuffer, zstdInBuffer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Write the epilog
|
|
|
|
size_t bytesLeft;
|
|
|
|
do {
|
|
|
|
if (!errorHolder.check(
|
|
|
|
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer);
|
|
|
|
if (!errorHolder.check(
|
|
|
|
!ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
out->push(split(outBuffer, zstdOutBuffer));
|
|
|
|
} while (bytesLeft != 0 && !errorHolder.hasError());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates how large each independently compressed frame should be.
|
|
|
|
*
|
|
|
|
* @param size The size of the source if known, 0 otherwise
|
|
|
|
* @param numThreads The number of threads available to run compression jobs on
|
|
|
|
* @param params The zstd parameters to be used for compression
|
|
|
|
*/
|
2016-09-07 03:11:02 +00:00
|
|
|
static size_t calculateStep(
|
|
|
|
std::uintmax_t size,
|
|
|
|
size_t numThreads,
|
|
|
|
const ZSTD_parameters ¶ms) {
|
2016-11-16 01:46:28 +00:00
|
|
|
(void)size;
|
|
|
|
(void)numThreads;
|
2016-11-16 00:39:09 +00:00
|
|
|
return size_t{1} << (params.cParams.windowLog + 2);
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
enum class FileStatus { Continue, Done, Error };
|
2016-09-03 03:11:22 +00:00
|
|
|
/// Determines the status of the file descriptor `fd`.
|
|
|
|
FileStatus fileStatus(FILE* fd) {
|
|
|
|
if (std::feof(fd)) {
|
|
|
|
return FileStatus::Done;
|
|
|
|
} else if (std::ferror(fd)) {
|
|
|
|
return FileStatus::Error;
|
|
|
|
}
|
|
|
|
return FileStatus::Continue;
|
|
|
|
}
|
2016-09-01 22:22:19 +00:00
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads `size` data in chunks of `chunkSize` and puts it into `queue`.
|
|
|
|
* Will read less if an error or EOF occurs.
|
|
|
|
* Returns the status of the file after all of the reads have occurred.
|
|
|
|
*/
|
|
|
|
static FileStatus
|
2016-09-23 19:55:21 +00:00
|
|
|
readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd,
|
|
|
|
std::uint64_t *totalBytesRead) {
|
2016-09-01 22:22:19 +00:00
|
|
|
Buffer buffer(size);
|
|
|
|
while (!buffer.empty()) {
|
|
|
|
auto bytesRead =
|
|
|
|
std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd);
|
2016-09-23 19:55:21 +00:00
|
|
|
*totalBytesRead += bytesRead;
|
2016-09-01 22:22:19 +00:00
|
|
|
queue.push(buffer.splitAt(bytesRead));
|
2016-09-03 03:11:22 +00:00
|
|
|
auto status = fileStatus(fd);
|
|
|
|
if (status != FileStatus::Continue) {
|
|
|
|
return status;
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return FileStatus::Continue;
|
|
|
|
}
|
|
|
|
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t asyncCompressChunks(
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state,
|
2016-09-01 22:22:19 +00:00
|
|
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
|
|
|
|
ThreadPool& executor,
|
|
|
|
FILE* fd,
|
2016-09-07 03:11:02 +00:00
|
|
|
std::uintmax_t size,
|
2016-09-01 22:22:19 +00:00
|
|
|
size_t numThreads,
|
|
|
|
ZSTD_parameters params) {
|
|
|
|
auto chunksGuard = makeScopeGuard([&] { chunks.finish(); });
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t bytesRead = 0;
|
2016-09-01 22:22:19 +00:00
|
|
|
|
|
|
|
// Break the input up into chunks of size `step` and compress each chunk
|
|
|
|
// independently.
|
|
|
|
size_t step = calculateStep(size, numThreads, params);
|
2016-11-16 00:39:09 +00:00
|
|
|
state.log(DEBUG, "Chosen frame size: %zu\n", step);
|
2016-09-01 22:22:19 +00:00
|
|
|
auto status = FileStatus::Continue;
|
2016-10-12 22:18:16 +00:00
|
|
|
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
|
2016-09-01 22:22:19 +00:00
|
|
|
// Make a new input queue that we will put the chunk's input data into.
|
|
|
|
auto in = std::make_shared<BufferWorkQueue>();
|
|
|
|
auto inGuard = makeScopeGuard([&] { in->finish(); });
|
|
|
|
// Make a new output queue that compress will put the compressed data into.
|
|
|
|
auto out = std::make_shared<BufferWorkQueue>();
|
|
|
|
// Start compression in the thread pool
|
2016-10-13 00:23:38 +00:00
|
|
|
executor.add([&state, in, out, step] {
|
2016-09-01 22:22:19 +00:00
|
|
|
return compress(
|
2016-10-13 00:23:38 +00:00
|
|
|
state, std::move(in), std::move(out), step);
|
2016-09-01 22:22:19 +00:00
|
|
|
});
|
|
|
|
// Pass the output queue to the writer thread.
|
|
|
|
chunks.push(std::move(out));
|
2017-03-06 03:36:56 +00:00
|
|
|
state.log(VERBOSE, "%s\n", "Starting a new frame");
|
2016-09-01 22:22:19 +00:00
|
|
|
// Fill the input queue for the compression job we just started
|
2016-09-23 19:55:21 +00:00
|
|
|
status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead);
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
2016-10-12 22:18:16 +00:00
|
|
|
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
|
2016-09-23 19:55:21 +00:00
|
|
|
return bytesRead;
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decompress a frame, whose data is streamed into `in`, and stream the output
|
|
|
|
* to `out`.
|
|
|
|
*
|
2016-10-12 22:18:16 +00:00
|
|
|
* @param state The shared state
|
2016-09-01 22:22:19 +00:00
|
|
|
* @param in Queue that we `pop()` input buffers from. It contains
|
|
|
|
* exactly one compressed frame.
|
|
|
|
* @param out Queue that we `push()` decompressed output buffers to
|
|
|
|
*/
|
|
|
|
static void decompress(
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state,
|
2016-09-01 22:22:19 +00:00
|
|
|
std::shared_ptr<BufferWorkQueue> in,
|
|
|
|
std::shared_ptr<BufferWorkQueue> out) {
|
2016-10-12 22:18:16 +00:00
|
|
|
auto& errorHolder = state.errorHolder;
|
2016-09-01 22:22:19 +00:00
|
|
|
auto guard = makeScopeGuard([&] { out->finish(); });
|
|
|
|
// Initialize the DCtx
|
2016-10-13 00:23:38 +00:00
|
|
|
auto ctx = state.dStreamPool->get();
|
2016-09-01 22:22:19 +00:00
|
|
|
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
{
|
2016-10-13 00:23:38 +00:00
|
|
|
auto err = ZSTD_resetDStream(ctx.get());
|
2016-09-01 22:22:19 +00:00
|
|
|
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const size_t outSize = ZSTD_DStreamOutSize();
|
|
|
|
Buffer inBuffer;
|
|
|
|
size_t returnCode = 0;
|
|
|
|
// Read a buffer in from the input queue
|
|
|
|
while (in->pop(inBuffer) && !errorHolder.hasError()) {
|
|
|
|
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
|
|
|
|
// Decompress the whole buffer and send it to the output queue
|
|
|
|
while (!inBuffer.empty() && !errorHolder.hasError()) {
|
|
|
|
// Allocate a buffer with at least outSize bytes.
|
|
|
|
Buffer outBuffer(outSize);
|
|
|
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
|
|
|
// Decompress
|
|
|
|
returnCode =
|
|
|
|
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
|
|
|
if (!errorHolder.check(
|
|
|
|
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Pass the buffer with the decompressed data to the output queue
|
|
|
|
out->push(split(outBuffer, zstdOutBuffer));
|
|
|
|
// Advance past the input we already read
|
|
|
|
advance(inBuffer, zstdInBuffer);
|
|
|
|
if (returnCode == 0) {
|
|
|
|
// The frame is over, prepare to (maybe) start a new frame
|
|
|
|
ZSTD_initDStream(ctx.get());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!errorHolder.check(returnCode <= 1, "Incomplete block")) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// We've given ZSTD_decompressStream all of our data, but there may still
|
|
|
|
// be data to read.
|
|
|
|
while (returnCode == 1) {
|
|
|
|
// Allocate a buffer with at least outSize bytes.
|
|
|
|
Buffer outBuffer(outSize);
|
|
|
|
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
|
|
|
|
// Pass in no input.
|
|
|
|
ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0};
|
|
|
|
// Decompress
|
|
|
|
returnCode =
|
|
|
|
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
|
|
|
|
if (!errorHolder.check(
|
|
|
|
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Pass the buffer with the decompressed data to the output queue
|
|
|
|
out->push(split(outBuffer, zstdOutBuffer));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t asyncDecompressFrames(
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state,
|
2016-09-01 22:22:19 +00:00
|
|
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
|
|
|
|
ThreadPool& executor,
|
|
|
|
FILE* fd) {
|
|
|
|
auto framesGuard = makeScopeGuard([&] { frames.finish(); });
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t totalBytesRead = 0;
|
|
|
|
|
2016-09-01 22:22:19 +00:00
|
|
|
// Split the source up into its component frames.
|
|
|
|
// If we find our recognized skippable frame we know the next frames size
|
|
|
|
// which means that we can decompress each standard frame in independently.
|
|
|
|
// Otherwise, we will decompress using only one decompression task.
|
|
|
|
const size_t chunkSize = ZSTD_DStreamInSize();
|
|
|
|
auto status = FileStatus::Continue;
|
2016-10-12 22:18:16 +00:00
|
|
|
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
|
2016-09-01 22:22:19 +00:00
|
|
|
// Make a new input queue that we will put the frames's bytes into.
|
|
|
|
auto in = std::make_shared<BufferWorkQueue>();
|
|
|
|
auto inGuard = makeScopeGuard([&] { in->finish(); });
|
|
|
|
// Make a output queue that decompress will put the decompressed data into
|
|
|
|
auto out = std::make_shared<BufferWorkQueue>();
|
|
|
|
|
|
|
|
size_t frameSize;
|
|
|
|
{
|
|
|
|
// Calculate the size of the next frame.
|
|
|
|
// frameSize is 0 if the frame info can't be decoded.
|
|
|
|
Buffer buffer(SkippableFrame::kSize);
|
|
|
|
auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd);
|
2016-09-23 19:55:21 +00:00
|
|
|
totalBytesRead += bytesRead;
|
2016-09-03 03:11:22 +00:00
|
|
|
status = fileStatus(fd);
|
2016-09-01 22:22:19 +00:00
|
|
|
if (bytesRead == 0 && status != FileStatus::Continue) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
buffer.subtract(buffer.size() - bytesRead);
|
|
|
|
frameSize = SkippableFrame::tryRead(buffer.range());
|
|
|
|
in->push(std::move(buffer));
|
|
|
|
}
|
2016-09-03 03:11:22 +00:00
|
|
|
if (frameSize == 0) {
|
|
|
|
// We hit a non SkippableFrame, so this will be the last job.
|
|
|
|
// Make sure that we don't use too much memory
|
|
|
|
in->setMaxSize(64);
|
|
|
|
out->setMaxSize(64);
|
|
|
|
}
|
2016-09-01 22:22:19 +00:00
|
|
|
// Start decompression in the thread pool
|
2016-10-12 22:18:16 +00:00
|
|
|
executor.add([&state, in, out] {
|
|
|
|
return decompress(state, std::move(in), std::move(out));
|
2016-09-01 22:22:19 +00:00
|
|
|
});
|
|
|
|
// Pass the output queue to the writer thread
|
|
|
|
frames.push(std::move(out));
|
|
|
|
if (frameSize == 0) {
|
|
|
|
// We hit a non SkippableFrame ==> not compressed by pzstd or corrupted
|
|
|
|
// Pass the rest of the source to this decompression task
|
2017-03-06 03:36:56 +00:00
|
|
|
state.log(VERBOSE, "%s\n",
|
|
|
|
"Input not in pzstd format, falling back to serial decompression");
|
2016-10-12 22:18:16 +00:00
|
|
|
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
|
2016-09-23 19:55:21 +00:00
|
|
|
status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead);
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-11-16 00:39:09 +00:00
|
|
|
state.log(VERBOSE, "Decompressing a frame of size %zu", frameSize);
|
2016-09-01 22:22:19 +00:00
|
|
|
// Fill the input queue for the decompression job we just started
|
2016-09-23 19:55:21 +00:00
|
|
|
status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead);
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
2016-10-12 22:18:16 +00:00
|
|
|
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
|
2016-09-23 19:55:21 +00:00
|
|
|
return totalBytesRead;
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Write `data` to `fd`, returns true iff success.
|
|
|
|
static bool writeData(ByteRange data, FILE* fd) {
|
|
|
|
while (!data.empty()) {
|
|
|
|
data.advance(std::fwrite(data.begin(), 1, data.size(), fd));
|
|
|
|
if (std::ferror(fd)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t writeFile(
|
2016-10-12 22:18:16 +00:00
|
|
|
SharedState& state,
|
2016-09-01 22:22:19 +00:00
|
|
|
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
|
|
|
|
FILE* outputFd,
|
2016-10-13 02:02:27 +00:00
|
|
|
bool decompress) {
|
2016-10-12 22:18:16 +00:00
|
|
|
auto& errorHolder = state.errorHolder;
|
2016-10-13 02:02:27 +00:00
|
|
|
auto lineClearGuard = makeScopeGuard([&state] {
|
|
|
|
state.log.clear(INFO);
|
2016-09-23 22:47:26 +00:00
|
|
|
});
|
2016-09-23 19:55:21 +00:00
|
|
|
std::uint64_t bytesWritten = 0;
|
2016-09-01 22:22:19 +00:00
|
|
|
std::shared_ptr<BufferWorkQueue> out;
|
|
|
|
// Grab the output queue for each decompression job (in order).
|
2017-06-23 01:09:42 +00:00
|
|
|
while (outs.pop(out)) {
|
|
|
|
if (errorHolder.hasError()) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-09-21 21:29:47 +00:00
|
|
|
if (!decompress) {
|
2016-09-01 22:22:19 +00:00
|
|
|
// If we are compressing and want to write skippable frames we can't
|
|
|
|
// start writing before compression is done because we need to know the
|
|
|
|
// compressed size.
|
|
|
|
// Wait for the compressed size to be available and write skippable frame
|
|
|
|
SkippableFrame frame(out->size());
|
|
|
|
if (!writeData(frame.data(), outputFd)) {
|
|
|
|
errorHolder.setError("Failed to write output");
|
|
|
|
return bytesWritten;
|
|
|
|
}
|
|
|
|
bytesWritten += frame.kSize;
|
|
|
|
}
|
|
|
|
// For each chunk of the frame: Pop it from the queue and write it
|
|
|
|
Buffer buffer;
|
|
|
|
while (out->pop(buffer) && !errorHolder.hasError()) {
|
|
|
|
if (!writeData(buffer.range(), outputFd)) {
|
|
|
|
errorHolder.setError("Failed to write output");
|
|
|
|
return bytesWritten;
|
|
|
|
}
|
|
|
|
bytesWritten += buffer.size();
|
2016-10-13 02:02:27 +00:00
|
|
|
state.log.update(INFO, "Written: %u MB ",
|
|
|
|
static_cast<std::uint32_t>(bytesWritten >> 20));
|
2016-09-01 22:22:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return bytesWritten;
|
|
|
|
}
|
|
|
|
}
|