Update:
 * new CLI; bro -> brotli; + man page
 * JNI wrappers preparation (for bazel build)
 * add raw binary dictionary representation `dictionary.bin`
 * add ability to side-load brotli RFC dictionary
 * decoder persists last error now
 * fix `BrotliDecoderDecompress` documentation
 * go reader don't block until necessary
 * more consistent bazel target names
 * Java dictionary data compiled footprint reduced
 * Java tests refactoring
This commit is contained in:
Eugene Kliuchnikov 2017-05-29 17:55:14 +02:00 committed by GitHub
parent 2c001010aa
commit 03739d2b11
39 changed files with 2187 additions and 686 deletions

4
.gitignore vendored
View File

@ -13,5 +13,5 @@ __pycache__/
# Tests
*.txt.uncompressed
*.bro
*.unbro
*.br
*.unbr

48
BUILD
View File

@ -9,6 +9,46 @@ licenses(["notice"]) # MIT
exports_files(["LICENSE"])
# >>> JNI headers
config_setting(
name = "darwin",
values = {"cpu": "darwin"},
visibility = ["//visibility:public"],
)
config_setting(
name = "darwin_x86_64",
values = {"cpu": "darwin_x86_64"},
visibility = ["//visibility:public"],
)
genrule(
name = "copy_link_jni_header",
srcs = ["@openjdk_linux//:jni_h"],
outs = ["jni/jni.h"],
cmd = "cp -f $< $@",
)
genrule(
name = "copy_link_jni_md_header",
srcs = select({
":darwin": ["@openjdk_macos//:jni_md_h"],
":darwin_x86_64": ["@openjdk_macos//:jni_md_h"],
"//conditions:default": ["@openjdk_linux//:jni_md_h"],
}),
outs = ["jni/jni_md.h"],
cmd = "cp -f $< $@",
)
cc_library(
name = "jni_inc",
hdrs = [":jni/jni.h", ":jni/jni_md.h"],
includes = ["jni"],
)
# <<< JNI headers
STRICT_C_OPTIONS = [
"--pedantic-errors",
"-Wall",
@ -59,7 +99,7 @@ filegroup(
)
cc_library(
name = "brotli",
name = "brotli_inc",
hdrs = [":public_headers"],
copts = STRICT_C_OPTIONS,
includes = ["c/include"],
@ -70,7 +110,7 @@ cc_library(
srcs = [":common_sources"],
hdrs = [":common_headers"],
copts = STRICT_C_OPTIONS,
deps = [":brotli"],
deps = [":brotli_inc"],
)
cc_library(
@ -91,8 +131,8 @@ cc_library(
)
cc_binary(
name = "bro",
srcs = ["c/tools/bro.c"],
name = "brotli",
srcs = ["c/tools/brotli.c"],
copts = STRICT_C_OPTIONS,
linkstatic = 1,
deps = [

View File

@ -181,14 +181,14 @@ if(BROTLI_PARENT_DIRECTORY)
set(BROTLI_LIBRARIES "${BROTLI_LIBRARIES}" PARENT_SCOPE)
endif()
# Build the bro executable
add_executable(bro c/tools/bro.c)
target_link_libraries(bro ${BROTLI_LIBRARIES})
# Build the brotli executable
add_executable(brotli c/tools/brotli.c)
target_link_libraries(brotli ${BROTLI_LIBRARIES})
# Installation
if(NOT BROTLI_BUNDLED_MODE)
install(
TARGETS bro
TARGETS brotli
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
)
@ -243,7 +243,7 @@ if(NOT BROTLI_DISABLE_TESTS)
add_test(NAME "${BROTLI_TEST_PREFIX}roundtrip/${INPUT}/${quality}"
COMMAND "${CMAKE_COMMAND}"
-DBROTLI_WRAPPER=${BROTLI_WINE}
-DBROTLI_CLI=$<TARGET_FILE:bro>
-DBROTLI_CLI=$<TARGET_FILE:brotli>
-DQUALITY=${quality}
-DINPUT=${INPUT_FILE}
-DOUTPUT=${OUTPUT_FILE}.${quality}
@ -260,7 +260,7 @@ if(NOT BROTLI_DISABLE_TESTS)
add_test(NAME "${BROTLI_TEST_PREFIX}compatibility/${INPUT}"
COMMAND "${CMAKE_COMMAND}"
-DBROTLI_WRAPPER=${BROTLI_WINE}
-DBROTLI_CLI=$<TARGET_FILE:bro>
-DBROTLI_CLI=$<TARGET_FILE:brotli>
-DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/${INPUT}
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/run-compatibility-test.cmake)
endforeach()

View File

@ -13,4 +13,4 @@ include python/brotlimodule.cc
include python/README.md
include README.md
include setup.py
include c/tools/bro.c
include c/tools/brotli.c

View File

@ -1,12 +1,13 @@
OS := $(shell uname)
LIBSOURCES = $(wildcard c/common/*.c) $(wildcard c/dec/*.c) $(wildcard c/enc/*.c)
SOURCES = $(LIBSOURCES) c/tools/bro.c
LIBSOURCES = $(wildcard c/common/*.c) $(wildcard c/dec/*.c) \
$(wildcard c/enc/*.c)
SOURCES = $(LIBSOURCES) c/tools/brotli.c
BINDIR = bin
OBJDIR = $(BINDIR)/obj
LIBOBJECTS = $(addprefix $(OBJDIR)/, $(LIBSOURCES:.c=.o))
OBJECTS = $(addprefix $(OBJDIR)/, $(SOURCES:.c=.o))
LIB_A = libbrotli.a
EXECUTABLE = bro
EXECUTABLE = brotli
DIRS = $(OBJDIR)/c/common $(OBJDIR)/c/dec $(OBJDIR)/c/enc \
$(OBJDIR)/c/tools $(BINDIR)/tmp
CFLAGS += -O2

View File

@ -13,6 +13,38 @@ git_repository(
remote = "https://github.com/bazelbuild/rules_go.git",
tag = "0.4.1",
)
load("@io_bazel_rules_go//go:def.bzl", "go_repositories")
new_http_archive(
name = "openjdk_linux",
url = "https://bazel-mirror.storage.googleapis.com/openjdk/azul-zulu-8.20.0.5-jdk8.0.121/zulu8.20.0.5-jdk8.0.121-linux_x64.tar.gz",
sha256 = "7fdfb17d890406470b2303d749d3138e7f353749e67a0a22f542e1ab3e482745",
build_file_content = """
package(
default_visibility = ["//visibility:public"],
)
filegroup(
name = "jni_h",
srcs = ["zulu8.20.0.5-jdk8.0.121-linux_x64/include/jni.h"],
)
filegroup(
name = "jni_md_h",
srcs = ["zulu8.20.0.5-jdk8.0.121-linux_x64/include/linux/jni_md.h"],
)""",
)
new_http_archive(
name = "openjdk_macos",
url = "https://bazel-mirror.storage.googleapis.com/openjdk/azul-zulu-8.20.0.5-jdk8.0.121/zulu8.20.0.5-jdk8.0.121-macosx_x64.zip",
sha256 = "2a58bd1d9b0cbf0b3d8d1bcdd117c407e3d5a0ec01e2f53565c9bec5cf9ea78b",
build_file_content = """
package(
default_visibility = ["//visibility:public"],
)
filegroup(
name = "jni_md_h",
srcs = ["zulu8.20.0.5-jdk8.0.121-macosx_x64/include/darwin/jni_md.h"],
)""",
)
load("@io_bazel_rules_go//go:def.bzl", "go_repositories")
go_repositories()

432
c/common/dictionary.bin Executable file

File diff suppressed because one or more lines are too long

View File

@ -10,24 +10,8 @@
extern "C" {
#endif
static const BrotliDictionary kBrotliDictionary = {
/* size_bits_by_length */
{
0, 0, 0, 0, 10, 10, 11, 11,
10, 10, 10, 10, 10, 9, 9, 8,
7, 7, 8, 7, 7, 6, 6, 5,
5, 0, 0, 0, 0, 0, 0, 0
},
/* offsets_by_length */
{
0, 0, 0, 0, 0, 4096, 9216, 21504,
35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
},
/* data */
#ifndef BROTLI_EXTERNAL_DICTIONARY_DATA
static const uint8_t kBrotliDictionaryData[] =
{
116,105,109,101,100,111,119,110,108,105,102,101,108,101,102,116,98,97,99,107,99,
111,100,101,100,97,116,97,115,104,111,119,111,110,108,121,115,105,116,101,99,105
@ -5875,12 +5859,47 @@ static const BrotliDictionary kBrotliDictionary = {
,164,181,224,164,190,224,164,136,224,164,184,224,164,149,224,165,141,224,164,176
,224,164,191,224,164,175,224,164,164,224,164,190
}
;
#endif /* !BROTLI_EXTERNAL_DICTIONARY_DATA */
static BrotliDictionary kBrotliDictionary = {
/* size_bits_by_length */
{
0, 0, 0, 0, 10, 10, 11, 11,
10, 10, 10, 10, 10, 9, 9, 8,
7, 7, 8, 7, 7, 6, 6, 5,
5, 0, 0, 0, 0, 0, 0, 0
},
/* offsets_by_length */
{
0, 0, 0, 0, 0, 4096, 9216, 21504,
35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
},
/* data_size == sizeof(kBrotliDictionaryData) */
122784,
/* data */
#ifdef BROTLI_EXTERNAL_DICTIONARY_DATA
NULL
#else
kBrotliDictionaryData
#endif
};
const BrotliDictionary* BrotliGetDictionary() {
return &kBrotliDictionary;
}
void BrotliSetDictionaryData(const uint8_t* data) {
if (!!data && !kBrotliDictionary.data) {
kBrotliDictionary.data = data;
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -27,19 +27,33 @@ typedef struct BrotliDictionary {
* Dictionary consists of words with length of [4..24] bytes.
* Values at [0..3] and [25..31] indices should not be addressed.
*/
uint8_t size_bits_by_length[32];
const uint8_t size_bits_by_length[32];
/* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
uint32_t offsets_by_length[32];
const uint32_t offsets_by_length[32];
/* assert(data_size == offsets_by_length[31]) */
const size_t data_size;
/* Data array is not bound, and should obey to size_bits_by_length values.
Specified size matches default (RFC 7932) dictionary. */
/* assert(sizeof(data) == offsets_by_length[31]) */
uint8_t data[122784];
Specified size matches default (RFC 7932) dictionary. Its size is
defined by data_size */
const uint8_t* data;
} BrotliDictionary;
BROTLI_COMMON_API extern const BrotliDictionary* BrotliGetDictionary(void);
/**
* Sets dictionary data.
*
* When dictionary data is already set / present, this method is no-op.
*
* Dictionary data MUST be provided before BrotliGetDictionary is invoked.
* This method is used ONLY in multi-client environment (e.g. C + Java),
* to reduce storage by sharing single dictionary between implementations.
*/
BROTLI_COMMON_API void BrotliSetDictionaryData(const uint8_t* data);
#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24

View File

@ -66,7 +66,6 @@ BrotliDecoderState* BrotliDecoderCreateInstance(
}
BrotliDecoderStateInitWithCustomAllocators(
state, alloc_func, free_func, opaque);
state->error_code = BROTLI_DECODER_NO_ERROR;
return state;
}
@ -1747,6 +1746,9 @@ CommandPostDecodeLiterals:
/* Compensate double distance-ring-buffer roll. */
s->dist_rb_idx += s->distance_context;
offset += word_idx * i;
if (BROTLI_PREDICT_FALSE(!s->dictionary->data)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
}
if (transform_idx < kNumTransforms) {
const uint8_t* word = &s->dictionary->data[offset];
int len = i;
@ -1899,6 +1901,10 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
size_t* available_out, uint8_t** next_out, size_t* total_out) {
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
/* Do not try to process further in a case of unrecoverable error. */
if ((int)s->error_code < 0) {
return BROTLI_DECODER_RESULT_ERROR;
}
if (*available_out && (!next_out || !*next_out)) {
return SaveErrorCode(
s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS));

View File

@ -41,6 +41,8 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
s->memory_manager_opaque = opaque;
}
s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
BrotliInitBitReader(&s->br);
s->state = BROTLI_STATE_UNINITED;
s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;

View File

@ -84,8 +84,9 @@ typedef enum {
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_1, -14) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \
\
/* -16..-19 codes are reserved */ \
/* -16..-18 codes are reserved */ \
\
BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR \
\
/* Memory allocation problems */ \
@ -207,9 +208,9 @@ BROTLI_DEC_API BrotliDecoderResult BrotliDecoderDecompress(
* allocation failed, arguments were invalid, etc.;
* use ::BrotliDecoderGetErrorCode to get detailed error code
* @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT decoding is blocked until
* more output space is provided
* @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT decoding is blocked until
* more input data is provided
* @returns ::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT decoding is blocked until
* more output space is provided
* @returns ::BROTLI_DECODER_RESULT_SUCCESS decoding is finished, no more
* input might be consumed and no more output will be produced
*/

View File

@ -1,521 +0,0 @@
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Example main() function for Brotli library. */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <brotli/decode.h>
#include <brotli/encode.h>
#if !defined(_WIN32)
#include <unistd.h>
#include <utime.h>
#else
#include <io.h>
#include <share.h>
#include <sys/utime.h>
#define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
#if !defined(__MINGW32__)
#define STDIN_FILENO MAKE_BINARY(_fileno(stdin))
#define STDOUT_FILENO MAKE_BINARY(_fileno(stdout))
#define S_IRUSR S_IREAD
#define S_IWUSR S_IWRITE
#endif
#define fdopen _fdopen
#define unlink _unlink
#define utimbuf _utimbuf
#define utime _utime
#define fopen ms_fopen
#define open ms_open
#define chmod(F, P) (0)
#define chown(F, O, G) (0)
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define fseek _fseeki64
#define ftell _ftelli64
#endif
static FILE* ms_fopen(const char *filename, const char *mode) {
FILE* result = 0;
fopen_s(&result, filename, mode);
return result;
}
static int ms_open(const char *filename, int oflag, int pmode) {
int result = -1;
_sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
return result;
}
#endif /* WIN32 */
static int ParseQuality(const char* s, int* quality) {
if (s[0] >= '0' && s[0] <= '9') {
*quality = s[0] - '0';
if (s[1] >= '0' && s[1] <= '9') {
*quality = *quality * 10 + s[1] - '0';
return (s[2] == 0) ? 1 : 0;
}
return (s[1] == 0) ? 1 : 0;
}
return 0;
}
static void ParseArgv(int argc, char **argv,
char **input_path,
char **output_path,
char **dictionary_path,
int *force,
int *quality,
int *decompress,
int *repeat,
int *verbose,
int *lgwin,
int *copy_stat) {
int k;
*force = 0;
*input_path = 0;
*output_path = 0;
*repeat = 1;
*verbose = 0;
*lgwin = 22;
*copy_stat = 1;
{
size_t argv0_len = strlen(argv[0]);
*decompress =
argv0_len >= 5 && strcmp(&argv[0][argv0_len - 5], "unbro") == 0;
}
for (k = 1; k < argc; ++k) {
if (!strcmp("--force", argv[k]) ||
!strcmp("-f", argv[k])) {
if (*force != 0) {
goto error;
}
*force = 1;
continue;
} else if (!strcmp("--decompress", argv[k]) ||
!strcmp("--uncompress", argv[k]) ||
!strcmp("-d", argv[k])) {
*decompress = 1;
continue;
} else if (!strcmp("--verbose", argv[k]) ||
!strcmp("-v", argv[k])) {
if (*verbose != 0) {
goto error;
}
*verbose = 1;
continue;
} else if (!strcmp("--no-copy-stat", argv[k])) {
if (*copy_stat == 0) {
goto error;
}
*copy_stat = 0;
continue;
}
if (k < argc - 1) {
if (!strcmp("--input", argv[k]) ||
!strcmp("--in", argv[k]) ||
!strcmp("-i", argv[k])) {
if (*input_path != 0) {
goto error;
}
*input_path = argv[k + 1];
++k;
continue;
} else if (!strcmp("--output", argv[k]) ||
!strcmp("--out", argv[k]) ||
!strcmp("-o", argv[k])) {
if (*output_path != 0) {
goto error;
}
*output_path = argv[k + 1];
++k;
continue;
} else if (!strcmp("--custom-dictionary", argv[k])) {
if (*dictionary_path != 0) {
goto error;
}
*dictionary_path = argv[k + 1];
++k;
continue;
} else if (!strcmp("--quality", argv[k]) ||
!strcmp("-q", argv[k])) {
if (!ParseQuality(argv[k + 1], quality)) {
goto error;
}
++k;
continue;
} else if (!strcmp("--repeat", argv[k]) ||
!strcmp("-r", argv[k])) {
if (!ParseQuality(argv[k + 1], repeat)) {
goto error;
}
++k;
continue;
} else if (!strcmp("--window", argv[k]) ||
!strcmp("-w", argv[k])) {
if (!ParseQuality(argv[k + 1], lgwin)) {
goto error;
}
if (*lgwin < 10 || *lgwin >= 25) {
goto error;
}
++k;
continue;
}
}
goto error;
}
return;
error:
fprintf(stderr,
"Usage: %s [--force] [--quality n] [--decompress]"
" [--input filename] [--output filename] [--repeat iters]"
" [--verbose] [--window n] [--custom-dictionary filename]"
" [--no-copy-stat]\n",
argv[0]);
exit(1);
}
static FILE* OpenInputFile(const char* input_path) {
FILE* f;
if (input_path == 0) {
return fdopen(STDIN_FILENO, "rb");
}
f = fopen(input_path, "rb");
if (f == 0) {
perror("fopen");
exit(1);
}
return f;
}
static FILE *OpenOutputFile(const char *output_path, const int force) {
int fd;
if (output_path == 0) {
return fdopen(STDOUT_FILENO, "wb");
}
fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
S_IRUSR | S_IWUSR);
if (fd < 0) {
if (!force) {
struct stat statbuf;
if (stat(output_path, &statbuf) == 0) {
fprintf(stderr, "output file exists\n");
exit(1);
}
}
perror("open");
exit(1);
}
return fdopen(fd, "wb");
}
static int64_t FileSize(const char *path) {
FILE *f = fopen(path, "rb");
int64_t retval;
if (f == NULL) {
return -1;
}
if (fseek(f, 0L, SEEK_END) != 0) {
fclose(f);
return -1;
}
retval = ftell(f);
if (fclose(f) != 0) {
return -1;
}
return retval;
}
/* Copy file times and permissions.
TODO: this is a "best effort" implementation; honest cross-platform
fully featured implementation is way too hacky; add more hacks by request. */
static void CopyStat(const char* input_path, const char* output_path) {
struct stat statbuf;
struct utimbuf times;
int res;
if (input_path == 0 || output_path == 0) {
return;
}
if (stat(input_path, &statbuf) != 0) {
return;
}
times.actime = statbuf.st_atime;
times.modtime = statbuf.st_mtime;
utime(output_path, &times);
res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
if (res != 0)
perror("chmod failed");
res = chown(output_path, (uid_t)-1, statbuf.st_gid);
if (res != 0)
perror("chown failed");
res = chown(output_path, statbuf.st_uid, (gid_t)-1);
if (res != 0)
perror("chown failed");
}
/* Result ownersip is passed to caller.
|*dictionary_size| is set to resulting buffer size. */
static uint8_t* ReadDictionary(const char* path, size_t* dictionary_size) {
static const int kMaxDictionarySize = (1 << 24) - 16;
FILE *f = fopen(path, "rb");
int64_t file_size_64;
uint8_t* buffer;
size_t bytes_read;
if (f == NULL) {
perror("fopen");
exit(1);
}
file_size_64 = FileSize(path);
if (file_size_64 == -1) {
fprintf(stderr, "could not get size of dictionary file");
exit(1);
}
if (file_size_64 > kMaxDictionarySize) {
fprintf(stderr, "dictionary is larger than maximum allowed: %d\n",
kMaxDictionarySize);
exit(1);
}
*dictionary_size = (size_t)file_size_64;
buffer = (uint8_t*)malloc(*dictionary_size);
if (!buffer) {
fprintf(stderr, "could not read dictionary: out of memory\n");
exit(1);
}
bytes_read = fread(buffer, sizeof(uint8_t), *dictionary_size, f);
if (bytes_read != *dictionary_size) {
fprintf(stderr, "could not read dictionary\n");
exit(1);
}
fclose(f);
return buffer;
}
static const size_t kFileBufferSize = 65536;
static int Decompress(FILE* fin, FILE* fout, const char* dictionary_path) {
/* Dictionary should be kept during first rounds of decompression. */
uint8_t* dictionary = NULL;
uint8_t* input;
uint8_t* output;
size_t available_in;
const uint8_t* next_in;
size_t available_out = kFileBufferSize;
uint8_t* next_out;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_ERROR;
BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
if (!s) {
fprintf(stderr, "out of memory\n");
return 0;
}
if (dictionary_path != NULL) {
size_t dictionary_size = 0;
dictionary = ReadDictionary(dictionary_path, &dictionary_size);
BrotliDecoderSetCustomDictionary(s, dictionary_size, dictionary);
}
input = (uint8_t*)malloc(kFileBufferSize);
output = (uint8_t*)malloc(kFileBufferSize);
if (!input || !output) {
fprintf(stderr, "out of memory\n");
goto end;
}
next_out = output;
result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
while (1) {
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
if (feof(fin)) {
break;
}
available_in = fread(input, 1, kFileBufferSize, fin);
next_in = input;
if (ferror(fin)) {
break;
}
} else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
fwrite(output, 1, kFileBufferSize, fout);
if (ferror(fout)) {
break;
}
available_out = kFileBufferSize;
next_out = output;
} else {
break; /* Error or success. */
}
result = BrotliDecoderDecompressStream(
s, &available_in, &next_in, &available_out, &next_out, 0);
}
if (next_out != output) {
fwrite(output, 1, (size_t)(next_out - output), fout);
}
if ((result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
fprintf(stderr, "failed to write output\n");
} else if (result != BROTLI_DECODER_RESULT_SUCCESS) {
/* Error or needs more input. */
fprintf(stderr, "corrupt input\n");
}
end:
free(dictionary);
free(input);
free(output);
BrotliDecoderDestroyInstance(s);
return (result == BROTLI_DECODER_RESULT_SUCCESS) ? 1 : 0;
}
static int Compress(int quality, int lgwin, FILE* fin, FILE* fout,
const char *dictionary_path) {
BrotliEncoderState* s = BrotliEncoderCreateInstance(0, 0, 0);
uint8_t* buffer = (uint8_t*)malloc(kFileBufferSize << 1);
uint8_t* input = buffer;
uint8_t* output = buffer + kFileBufferSize;
size_t available_in = 0;
const uint8_t* next_in = NULL;
size_t available_out = kFileBufferSize;
uint8_t* next_out = output;
int is_eof = 0;
int is_ok = 1;
if (!s || !buffer) {
is_ok = 0;
goto finish;
}
BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
if (dictionary_path != NULL) {
size_t dictionary_size = 0;
uint8_t* dictionary = ReadDictionary(dictionary_path, &dictionary_size);
BrotliEncoderSetCustomDictionary(s, dictionary_size, dictionary);
free(dictionary);
}
while (1) {
if (available_in == 0 && !is_eof) {
available_in = fread(input, 1, kFileBufferSize, fin);
next_in = input;
if (ferror(fin)) break;
is_eof = feof(fin);
}
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
&available_in, &next_in, &available_out, &next_out, NULL)) {
is_ok = 0;
break;
}
if (available_out != kFileBufferSize) {
size_t out_size = kFileBufferSize - available_out;
fwrite(output, 1, out_size, fout);
if (ferror(fout)) break;
available_out = kFileBufferSize;
next_out = output;
}
if (BrotliEncoderIsFinished(s)) break;
}
finish:
free(buffer);
BrotliEncoderDestroyInstance(s);
if (!is_ok) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data\n");
return 0;
} else if (ferror(fout)) {
fprintf(stderr, "failed to write output\n");
return 0;
} else if (ferror(fin)) {
fprintf(stderr, "failed to read input\n");
return 0;
}
return 1;
}
int main(int argc, char** argv) {
char *input_path = 0;
char *output_path = 0;
char *dictionary_path = 0;
int force = 0;
int quality = 11;
int decompress = 0;
int repeat = 1;
int verbose = 0;
int lgwin = 0;
int copy_stat = 1;
clock_t clock_start;
int i;
ParseArgv(argc, argv, &input_path, &output_path, &dictionary_path, &force,
&quality, &decompress, &repeat, &verbose, &lgwin, &copy_stat);
clock_start = clock();
for (i = 0; i < repeat; ++i) {
FILE* fin = OpenInputFile(input_path);
FILE* fout = OpenOutputFile(output_path, force || (repeat > 1));
int is_ok = 0;
if (decompress) {
is_ok = Decompress(fin, fout, dictionary_path);
} else {
is_ok = Compress(quality, lgwin, fin, fout, dictionary_path);
}
if (!is_ok) {
unlink(output_path);
exit(1);
}
if (fclose(fout) != 0) {
perror("fclose");
exit(1);
}
/* TOCTOU violation, but otherwise it is impossible to set file times. */
if (copy_stat && (i + 1 == repeat)) {
CopyStat(input_path, output_path);
}
if (fclose(fin) != 0) {
perror("fclose");
exit(1);
}
}
if (verbose) {
clock_t clock_end = clock();
double duration = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
int64_t uncompressed_size;
double uncompressed_bytes_in_MB;
if (duration < 1e-9) {
duration = 1e-9;
}
uncompressed_size = FileSize(decompress ? output_path : input_path);
if (uncompressed_size == -1) {
fprintf(stderr, "failed to determine uncompressed file size\n");
exit(1);
}
uncompressed_bytes_in_MB =
(double)(repeat * uncompressed_size) / (1024.0 * 1024.0);
if (decompress) {
printf("Brotli decompression speed: ");
} else {
printf("Brotli compression speed: ");
}
printf("%g MB/s\n", uncompressed_bytes_in_MB / duration);
}
return 0;
}

934
c/tools/brotli.c Executable file
View File

@ -0,0 +1,934 @@
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Command line interface for Brotli library. */
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include "../common/version.h"
#include <brotli/decode.h>
#include <brotli/encode.h>
#if !defined(_WIN32)
#include <unistd.h>
#include <utime.h>
#else
#include <io.h>
#include <share.h>
#include <sys/utime.h>
#define MAKE_BINARY(FILENO) (_setmode((FILENO), _O_BINARY), (FILENO))
#if !defined(__MINGW32__)
#define STDIN_FILENO MAKE_BINARY(_fileno(stdin))
#define STDOUT_FILENO MAKE_BINARY(_fileno(stdout))
#define S_IRUSR S_IREAD
#define S_IWUSR S_IWRITE
#endif
#define fdopen _fdopen
#define unlink _unlink
#define utimbuf _utimbuf
#define utime _utime
#define fopen ms_fopen
#define open ms_open
#define chmod(F, P) (0)
#define chown(F, O, G) (0)
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define fseek _fseeki64
#define ftell _ftelli64
#endif
static FILE* ms_fopen(const char* filename, const char* mode) {
FILE* result = 0;
fopen_s(&result, filename, mode);
return result;
}
static int ms_open(const char* filename, int oflag, int pmode) {
int result = -1;
_sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
return result;
}
#endif /* WIN32 */
typedef enum {
COMMAND_COMPRESS,
COMMAND_DECOMPRESS,
COMMAND_HELP,
COMMAND_INVALID,
COMMAND_TEST_INTEGRITY,
COMMAND_NOOP,
COMMAND_VERSION
} Command;
#define DEFAULT_LGWIN 22
#define DEFAULT_SUFFIX ".br"
#define MAX_OPTIONS 20
typedef struct {
/* Parameters */
int quality;
int lgwin;
BROTLI_BOOL force_overwrite;
BROTLI_BOOL junk_source;
BROTLI_BOOL copy_stat;
BROTLI_BOOL verbose;
BROTLI_BOOL write_to_stdout;
BROTLI_BOOL test_integrity;
BROTLI_BOOL decompress;
const char* output_path;
const char* dictionary_path;
const char* suffix;
int not_input_indices[MAX_OPTIONS];
size_t longest_path_len;
size_t input_count;
/* Inner state */
int argc;
char** argv;
uint8_t* dictionary;
size_t dictionary_size;
char* modified_path; /* Storage for path with appended / cut suffix */
int iterator;
int ignore;
BROTLI_BOOL iterator_error;
uint8_t* buffer;
uint8_t* input;
uint8_t* output;
const char* current_input_path;
const char* current_output_path;
FILE* fin;
FILE* fout;
} Context;
/* Parse up to 5 decimal digits. */
static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
int value = 0;
int i;
for (i = 0; i < 5; ++i) {
char c = s[i];
if (c == 0) break;
if (s[i] < '0' || s[i] > '9') return BROTLI_FALSE;
value = (10 * value) + (c - '0');
}
if (i == 0) return BROTLI_FALSE;
if (i > 1 && s[0] == '0') return BROTLI_FALSE;
if (s[i] != 0) return BROTLI_FALSE;
if (value < low || value > high) return BROTLI_FALSE;
*result = value;
return BROTLI_TRUE;
}
/* Returns "base file name" or its tail, if it contains '/' or '\'. */
static const char* FileName(const char* path) {
const char* separator_position = strrchr(path, '/');
if (separator_position) path = separator_position + 1;
separator_position = strrchr(path, '\\');
if (separator_position) path = separator_position + 1;
return path;
}
/* Detect if the program name is a special alias that infers a command type. */
static Command ParseAlias(const char* name) {
/* TODO: cast name to lower case? */
const char* unbrotli = "unbrotli";
size_t unbrotli_len = strlen(unbrotli);
name = FileName(name);
/* Partial comparison. On Windows there could be ".exe" suffix. */
if (strncmp(name, unbrotli, unbrotli_len)) {
char terminator = name[unbrotli_len];
if (terminator == 0 || terminator == '.') return COMMAND_DECOMPRESS;
}
return COMMAND_COMPRESS;
}
static Command ParseParams(Context* params) {
int argc = params->argc;
char** argv = params->argv;
int i;
int next_option_index = 0;
size_t input_count = 0;
size_t longest_path_len = 1;
BROTLI_BOOL command_set = BROTLI_FALSE;
BROTLI_BOOL quality_set = BROTLI_FALSE;
BROTLI_BOOL output_set = BROTLI_FALSE;
BROTLI_BOOL keep_set = BROTLI_FALSE;
BROTLI_BOOL lgwin_set = BROTLI_FALSE;
BROTLI_BOOL suffix_set = BROTLI_FALSE;
BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
Command command = ParseAlias(argv[0]);
for (i = 1; i < argc; ++i) {
const char* arg = argv[i];
size_t arg_len = strlen(arg);
/* C99 5.1.2.2.1: "members argv[0] through argv[argc-1] inclusive shall
contain pointers to strings"; NULL and 0-length are not forbidden. */
if (!arg || arg_len == 0) {
params->not_input_indices[next_option_index++] = i;
continue;
}
/* Too many options. The expected longest option list is:
"-q 0 -w 10 -o f -D d -S b -d -f -k -n -v --", i.e. 16 items in total.
This check is an additinal guard that is never triggered, but provides an
additional guard for future changes. */
if (next_option_index > (MAX_OPTIONS - 2)) {
return COMMAND_INVALID;
}
/* Input file entry. */
if (after_dash_dash || arg[0] != '-' || arg_len == 1) {
input_count++;
if (longest_path_len < arg_len) longest_path_len = arg_len;
continue;
}
/* Not a file entry. */
params->not_input_indices[next_option_index++] = i;
/* '--' entry stop parsing arguments. */
if (arg_len == 2 && arg[1] == '-') {
after_dash_dash = BROTLI_TRUE;
continue;
}
/* Simple / coalesced options. */
if (arg[1] != '-') {
size_t j;
for (j = 1; j < arg_len; ++j) {
char c = arg[j];
if (c >= '0' && c <= '9') {
if (quality_set) return COMMAND_INVALID;
quality_set = BROTLI_TRUE;
params->quality = c - '0';
continue;
} else if (c == 'c') {
if (output_set) return COMMAND_INVALID;
output_set = BROTLI_TRUE;
params->write_to_stdout = BROTLI_TRUE;
continue;
} else if (c == 'd') {
if (command_set) return COMMAND_INVALID;
command_set = BROTLI_TRUE;
command = COMMAND_DECOMPRESS;
continue;
} else if (c == 'f') {
if (params->force_overwrite) return COMMAND_INVALID;
params->force_overwrite = BROTLI_TRUE;
continue;
} else if (c == 'h') {
/* Don't parse further. */
return COMMAND_HELP;
} else if (c == 'j' || c == 'k') {
if (keep_set) return COMMAND_INVALID;
keep_set = BROTLI_TRUE;
params->junk_source = TO_BROTLI_BOOL(c == 'j');
continue;
} else if (c == 'n') {
if (!params->copy_stat) return COMMAND_INVALID;
params->copy_stat = BROTLI_FALSE;
continue;
} else if (c == 't') {
if (command_set) return COMMAND_INVALID;
command_set = BROTLI_TRUE;
command = COMMAND_TEST_INTEGRITY;
continue;
} else if (c == 'v') {
if (params->verbose) return COMMAND_INVALID;
params->verbose = BROTLI_TRUE;
continue;
} else if (c == 'V') {
/* Don't parse further. */
return COMMAND_VERSION;
} else if (c == 'Z') {
if (quality_set) return COMMAND_INVALID;
quality_set = BROTLI_TRUE;
params->quality = 11;
continue;
}
/* o/q/w/D/S with parameter is expected */
if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
return COMMAND_INVALID;
}
if (j + 1 != arg_len) return COMMAND_INVALID;
i++;
if (i == argc || !argv[i] || argv[i][0] == 0) return COMMAND_INVALID;
params->not_input_indices[next_option_index++] = i;
if (c == 'o') {
if (output_set) return COMMAND_INVALID;
params->output_path = argv[i];
} else if (c == 'q') {
if (quality_set) return COMMAND_INVALID;
quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY,
BROTLI_MAX_QUALITY, &params->quality);
if (!quality_set) return COMMAND_INVALID;
} else if (c == 'w') {
if (lgwin_set) return COMMAND_INVALID;
lgwin_set = ParseInt(argv[i], 0,
BROTLI_MAX_WINDOW_BITS, &params->lgwin);
if (!lgwin_set) return COMMAND_INVALID;
if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
return COMMAND_INVALID;
}
} else if (c == 'D') {
if (params->dictionary_path) return COMMAND_INVALID;
params->dictionary_path = argv[i];
} else if (c == 'S') {
if (suffix_set) return COMMAND_INVALID;
suffix_set = BROTLI_TRUE;
params->suffix = argv[i];
}
}
} else { /* Double-dash. */
arg = &arg[2];
if (strcmp("best", arg) == 0) {
if (quality_set) return COMMAND_INVALID;
quality_set = BROTLI_TRUE;
params->quality = 11;
} else if (strcmp("decompress", arg) == 0) {
if (command_set) return COMMAND_INVALID;
command_set = BROTLI_TRUE;
command = COMMAND_DECOMPRESS;
} else if (strcmp("force", arg) == 0) {
if (params->force_overwrite) return COMMAND_INVALID;
params->force_overwrite = BROTLI_TRUE;
} else if (strcmp("help", arg) == 0) {
/* Don't parse further. */
return COMMAND_HELP;
} else if (strcmp("keep", arg) == 0) {
if (keep_set) return COMMAND_INVALID;
keep_set = BROTLI_TRUE;
params->junk_source = BROTLI_FALSE;
} else if (strcmp("no-copy-stat", arg) == 0) {
if (!params->copy_stat) return COMMAND_INVALID;
params->copy_stat = BROTLI_FALSE;
} else if (strcmp("rm", arg) == 0) {
if (keep_set) return COMMAND_INVALID;
keep_set = BROTLI_TRUE;
params->junk_source = BROTLI_TRUE;
} else if (strcmp("stdout", arg) == 0) {
if (output_set) return COMMAND_INVALID;
output_set = BROTLI_TRUE;
params->write_to_stdout = BROTLI_TRUE;
} else if (strcmp("test", arg) == 0) {
if (command_set) return COMMAND_INVALID;
command_set = BROTLI_TRUE;
command = COMMAND_TEST_INTEGRITY;
} else if (strcmp("verbose", arg) == 0) {
if (params->verbose) return COMMAND_INVALID;
params->verbose = BROTLI_TRUE;
} else if (strcmp("version", arg) == 0) {
/* Don't parse further. */
return COMMAND_VERSION;
} else {
/* key=value */
const char* value = strrchr(arg, '=');
size_t key_len;
if (!value || value[1] == 0) return COMMAND_INVALID;
key_len = (size_t)(value - arg);
value++;
if (strncmp("dictionary", arg, key_len) == 0) {
if (params->dictionary_path) return COMMAND_INVALID;
params->dictionary_path = value;
} else if (strncmp("lgwin", arg, key_len) == 0) {
if (lgwin_set) return COMMAND_INVALID;
lgwin_set = ParseInt(value, 0,
BROTLI_MAX_WINDOW_BITS, &params->lgwin);
if (!lgwin_set) return COMMAND_INVALID;
if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) {
return COMMAND_INVALID;
}
} else if (strncmp("output", arg, key_len) == 0) {
if (output_set) return COMMAND_INVALID;
params->output_path = value;
} else if (strncmp("quality", arg, key_len) == 0) {
if (quality_set) return COMMAND_INVALID;
quality_set = ParseInt(value, BROTLI_MIN_QUALITY,
BROTLI_MAX_QUALITY, &params->quality);
if (!quality_set) return COMMAND_INVALID;
} else if (strncmp("suffix", arg, key_len) == 0) {
if (suffix_set) return COMMAND_INVALID;
suffix_set = BROTLI_TRUE;
params->suffix = value;
} else {
return COMMAND_INVALID;
}
}
}
}
params->input_count = input_count;
params->longest_path_len = longest_path_len;
params->decompress = (command == COMMAND_DECOMPRESS);
params->test_integrity = (command == COMMAND_TEST_INTEGRITY);
if (input_count > 1 && output_set) return COMMAND_INVALID;
if (params->test_integrity) {
if (params->output_path) return COMMAND_INVALID;
if (params->write_to_stdout) return COMMAND_INVALID;
}
if (strchr(params->suffix, '/') || strchr(params->suffix, '\\')) {
return COMMAND_INVALID;
}
return command;
}
static void PrintVersion(void) {
int major = BROTLI_VERSION >> 24;
int minor = (BROTLI_VERSION >> 12) & 0xFFF;
int patch = BROTLI_VERSION & 0xFFF;
fprintf(stdout, "\
brotli %d.%d.%d\n",
major, minor, patch);
}
static void PrintHelp(const char* name) {
/* String is cut to pieces with length less than 509, to conform C90 spec. */
fprintf(stdout, "\
Usage: %s [OPTION]... [FILE]...\n",
name);
fprintf(stdout, "\
Options:\n\
-# compression level (0-9)\n\
-c, --stdout write on standard output\n\
-d, --decompress decompress\n\
-f, --force force output file overwrite\n\
-h, --help display this help and exit\n");
fprintf(stdout, "\
-j, --rm remove source file(s)\n\
-k, --keep keep source file(s) (default)\n\
-n, --no-copy-stat do not copy source file(s) attributes\n\
-o FILE, --output=FILE output file (only if 1 input file)\n");
fprintf(stdout, "\
-q NUM, --quality=NUM compression level (%d-%d)\n",
BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY);
fprintf(stdout, "\
-t, --test test compressed file integrity\n\
-v, --verbose verbose mode\n");
fprintf(stdout, "\
-w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d) (default:%d)\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS, DEFAULT_LGWIN);
fprintf(stdout, "\
window size = 2**NUM - 16\n\
0 lets compressor decide over the optimal value\n\
-D FILE, --dictionary=FILE use FILE as LZ77 dictionary\n");
fprintf(stdout, "\
-S SUF, --suffix=SUF output file suffix (default:'%s')\n",
DEFAULT_SUFFIX);
fprintf(stdout, "\
-V, --version display version and exit\n\
-Z, --best use best compression level (11) (default)\n\
Simple options could be coalesced, i.e. '-9kf' is equivalent to '-9 -k -f'.\n\
With no FILE, or when FILE is -, read standard input.\n\
All arguments after '--' are treated as files.\n");
}
static const char* PrintablePath(const char* path) {
return path ? path : "con";
}
static BROTLI_BOOL OpenInputFile(const char* input_path, FILE** f) {
*f = NULL;
if (!input_path) {
*f = fdopen(STDIN_FILENO, "rb");
return BROTLI_TRUE;
}
*f = fopen(input_path, "rb");
if (!*f) {
fprintf(stderr, "failed to open input file [%s]: %s\n",
PrintablePath(input_path), strerror(errno));
return BROTLI_FALSE;
}
return BROTLI_TRUE;
}
static BROTLI_BOOL OpenOutputFile(const char* output_path, FILE** f,
BROTLI_BOOL force) {
int fd;
*f = NULL;
if (!output_path) {
*f = fdopen(STDOUT_FILENO, "wb");
return BROTLI_TRUE;
}
fd = open(output_path, O_CREAT | (force ? 0 : O_EXCL) | O_WRONLY | O_TRUNC,
S_IRUSR | S_IWUSR);
if (fd < 0) {
fprintf(stderr, "failed to open output file [%s]: %s\n",
PrintablePath(output_path), strerror(errno));
return BROTLI_FALSE;
}
*f = fdopen(fd, "wb");
if (!*f) {
fprintf(stderr, "failed to open output file [%s]: %s\n",
PrintablePath(output_path), strerror(errno));
return BROTLI_FALSE;
}
return BROTLI_TRUE;
}
static int64_t FileSize(const char* path) {
FILE* f = fopen(path, "rb");
int64_t retval;
if (f == NULL) {
return -1;
}
if (fseek(f, 0L, SEEK_END) != 0) {
fclose(f);
return -1;
}
retval = ftell(f);
if (fclose(f) != 0) {
return -1;
}
return retval;
}
/* Copy file times and permissions.
TODO(eustas): this is a "best effort" implementation; honest cross-platform
fully featured implementation is way too hacky; add more hacks by request. */
static void CopyStat(const char* input_path, const char* output_path) {
struct stat statbuf;
struct utimbuf times;
int res;
if (input_path == 0 || output_path == 0) {
return;
}
if (stat(input_path, &statbuf) != 0) {
return;
}
times.actime = statbuf.st_atime;
times.modtime = statbuf.st_mtime;
utime(output_path, &times);
res = chmod(output_path, statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO));
if (res != 0) {
fprintf(stderr, "setting access bits failed for [%s]: %s\n",
PrintablePath(output_path), strerror(errno));
}
res = chown(output_path, (uid_t)-1, statbuf.st_gid);
if (res != 0) {
fprintf(stderr, "setting group failed for [%s]: %s\n",
PrintablePath(output_path), strerror(errno));
}
res = chown(output_path, statbuf.st_uid, (gid_t)-1);
if (res != 0) {
fprintf(stderr, "setting user failed for [%s]: %s\n",
PrintablePath(output_path), strerror(errno));
}
}
/* Result ownership is passed to caller.
|*dictionary_size| is set to resulting buffer size. */
static BROTLI_BOOL ReadDictionary(Context* context) {
static const int kMaxDictionarySize = (1 << 24) - 16;
FILE* f;
int64_t file_size_64;
uint8_t* buffer;
size_t bytes_read;
if (context->dictionary_path == NULL) return BROTLI_TRUE;
f = fopen(context->dictionary_path, "rb");
if (f == NULL) {
fprintf(stderr, "failed to open dictionary file [%s]: %s\n",
PrintablePath(context->dictionary_path), strerror(errno));
return BROTLI_FALSE;
}
file_size_64 = FileSize(context->dictionary_path);
if (file_size_64 == -1) {
fprintf(stderr, "could not get size of dictionary file [%s]",
PrintablePath(context->dictionary_path));
return BROTLI_FALSE;
}
if (file_size_64 > kMaxDictionarySize) {
fprintf(stderr, "dictionary [%s] is larger than maximum allowed: %d\n",
PrintablePath(context->dictionary_path), kMaxDictionarySize);
return BROTLI_FALSE;
}
context->dictionary_size = (size_t)file_size_64;
buffer = (uint8_t*)malloc(context->dictionary_size);
if (!buffer) {
fprintf(stderr, "could not read dictionary: out of memory\n");
return BROTLI_FALSE;
}
bytes_read = fread(buffer, sizeof(uint8_t), context->dictionary_size, f);
if (bytes_read != context->dictionary_size) {
free(buffer);
fprintf(stderr, "failed to read dictionary [%s]: %s\n",
PrintablePath(context->dictionary_path), strerror(errno));
return BROTLI_FALSE;
}
fclose(f);
context->dictionary = buffer;
return BROTLI_TRUE;
}
static BROTLI_BOOL NextFile(Context* context) {
const char* arg;
size_t arg_len;
/* Iterator points to last used arg; increment to search for the next one. */
context->iterator++;
/* No input path; read from console. */
if (context->input_count == 0) {
if (context->iterator > 1) return BROTLI_FALSE;
context->current_input_path = NULL;
/* Either write to the specified path, or to console. */
context->current_output_path = context->output_path;
return BROTLI_TRUE;
}
/* Skip option arguments. */
while (context->iterator == context->not_input_indices[context->ignore]) {
context->iterator++;
context->ignore++;
}
/* All args are scanned already. */
if (context->iterator >= context->argc) return BROTLI_FALSE;
/* Iterator now points to the input file name. */
arg = context->argv[context->iterator];
arg_len = strlen(arg);
/* Read from console. */
if (arg_len == 1 && arg[0] == '-') {
context->current_input_path = NULL;
context->current_output_path = context->output_path;
return BROTLI_TRUE;
}
context->current_input_path = arg;
context->current_output_path = context->output_path;
if (context->output_path) return BROTLI_TRUE;
if (context->write_to_stdout) return BROTLI_TRUE;
strcpy(context->modified_path, arg);
context->current_output_path = context->modified_path;
/* If output is not specified, input path suffix should match. */
if (context->decompress) {
size_t suffix_len = strlen(context->suffix);
char* name = (char*)FileName(context->modified_path);
char* name_suffix;
size_t name_len = strlen(name);
if (name_len < suffix_len + 1) {
fprintf(stderr, "empty output file name for [%s] input file\n",
PrintablePath(arg));
context->iterator_error = BROTLI_TRUE;
return BROTLI_FALSE;
}
name_suffix = name + name_len - suffix_len;
if (strcmp(context->suffix, name_suffix) != 0) {
fprintf(stderr, "input file [%s] suffix mismatch\n",
PrintablePath(arg));
context->iterator_error = BROTLI_TRUE;
return BROTLI_FALSE;
}
name_suffix[0] = 0;
return BROTLI_TRUE;
} else {
strcpy(context->modified_path + arg_len, context->suffix);
return BROTLI_TRUE;
}
}
static BROTLI_BOOL OpenFiles(Context* context) {
BROTLI_BOOL is_ok = OpenInputFile(context->current_input_path, &context->fin);
if (!context->test_integrity && is_ok) {
is_ok = OpenOutputFile(
context->current_output_path, &context->fout, context->force_overwrite);
}
return is_ok;
}
static BROTLI_BOOL CloseFiles(Context* context, BROTLI_BOOL success) {
BROTLI_BOOL is_ok = BROTLI_TRUE;
if (!context->test_integrity && context->fout) {
if (!success) unlink(context->current_output_path);
if (fclose(context->fout) != 0) {
if (success) {
fprintf(stderr, "fclose failed [%s]: %s\n",
PrintablePath(context->current_output_path), strerror(errno));
}
is_ok = BROTLI_FALSE;
}
/* TOCTOU violation, but otherwise it is impossible to set file times. */
if (success && is_ok && context->copy_stat) {
CopyStat(context->current_input_path, context->current_output_path);
}
}
if (fclose(context->fin) != 0) {
if (is_ok) {
fprintf(stderr, "fclose failed [%s]: %s\n",
PrintablePath(context->current_input_path), strerror(errno));
}
is_ok = BROTLI_FALSE;
}
if (success && context->junk_source) {
unlink(context->current_input_path);
}
context->fin = NULL;
context->fout = NULL;
return is_ok;
}
static const size_t kFileBufferSize = 1 << 16;
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
size_t available_in;
const uint8_t* next_in;
size_t available_out = kFileBufferSize;
uint8_t* next_out = context->output;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
for (;;) {
if (next_out != context->output) {
if (!context->test_integrity) {
size_t out_size = (size_t)(next_out - context->output);
fwrite(context->output, 1, out_size, context->fout);
if (ferror(context->fout)) {
fprintf(stderr, "failed to write output [%s]: %s\n",
PrintablePath(context->current_output_path), strerror(errno));
return BROTLI_FALSE;
}
}
available_out = kFileBufferSize;
next_out = context->output;
}
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
if (feof(context->fin)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_output_path));
return BROTLI_FALSE;
}
available_in = fread(context->input, 1, kFileBufferSize, context->fin);
next_in = context->input;
if (ferror(context->fin)) {
fprintf(stderr, "failed to read input [%s]: %s\n",
PrintablePath(context->current_input_path), strerror(errno));
return BROTLI_FALSE;
}
} else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
/* Nothing to do - output is already written. */
} else if (result == BROTLI_DECODER_RESULT_SUCCESS) {
if (available_in != 0 || !feof(context->fin)) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_output_path));
return BROTLI_FALSE;
}
return BROTLI_TRUE;
} else {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_output_path));
return BROTLI_FALSE;
}
result = BrotliDecoderDecompressStream(
s, &available_in, &next_in, &available_out, &next_out, 0);
}
}
static BROTLI_BOOL DecompressFiles(Context* context) {
while (NextFile(context)) {
BROTLI_BOOL is_ok = BROTLI_TRUE;
BrotliDecoderState* s = BrotliDecoderCreateInstance(NULL, NULL, NULL);
if (!s) {
fprintf(stderr, "out of memory\n");
return BROTLI_FALSE;
}
if (context->dictionary) {
BrotliDecoderSetCustomDictionary(s,
context->dictionary_size, context->dictionary);
}
is_ok = OpenFiles(context);
if (is_ok) is_ok = DecompressFile(context, s);
BrotliDecoderDestroyInstance(s);
if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
if (!is_ok) return BROTLI_FALSE;
}
return BROTLI_TRUE;
}
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
size_t available_in = 0;
const uint8_t* next_in = NULL;
size_t available_out = kFileBufferSize;
uint8_t* next_out = context->output;
BROTLI_BOOL is_eof = BROTLI_FALSE;
for (;;) {
if (available_in == 0 && !is_eof) {
available_in = fread(context->input, 1, kFileBufferSize, context->fin);
next_in = context->input;
if (ferror(context->fin)) {
fprintf(stderr, "failed to read input [%s]: %s\n",
PrintablePath(context->current_input_path), strerror(errno));
return BROTLI_FALSE;
}
is_eof = feof(context->fin) ? BROTLI_TRUE : BROTLI_FALSE;
}
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
&available_in, &next_in, &available_out, &next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
if (available_out != kFileBufferSize) {
size_t out_size = kFileBufferSize - available_out;
fwrite(context->output, 1, out_size, context->fout);
if (ferror(context->fout)) {
fprintf(stderr, "failed to write output [%s]: %s\n",
PrintablePath(context->current_output_path), strerror(errno));
return BROTLI_FALSE;
}
available_out = kFileBufferSize;
next_out = context->output;
}
if (BrotliEncoderIsFinished(s)) return BROTLI_TRUE;
}
}
static BROTLI_BOOL CompressFiles(Context* context) {
while (NextFile(context)) {
BROTLI_BOOL is_ok = BROTLI_TRUE;
BrotliEncoderState* s = BrotliEncoderCreateInstance(NULL, NULL, NULL);
if (!s) {
fprintf(stderr, "out of memory\n");
return BROTLI_FALSE;
}
BrotliEncoderSetParameter(s,
BROTLI_PARAM_QUALITY, (uint32_t)context->quality);
BrotliEncoderSetParameter(s,
BROTLI_PARAM_LGWIN, (uint32_t)context->lgwin);
if (context->dictionary) {
BrotliEncoderSetCustomDictionary(s,
context->dictionary_size, context->dictionary);
}
is_ok = OpenFiles(context);
if (is_ok) is_ok = CompressFile(context, s);
BrotliEncoderDestroyInstance(s);
if (!CloseFiles(context, is_ok)) is_ok = BROTLI_FALSE;
if (!is_ok) return BROTLI_FALSE;
}
return BROTLI_TRUE;
}
int main(int argc, char** argv) {
Command command;
Context context;
BROTLI_BOOL is_ok = BROTLI_TRUE;
int i;
context.quality = 11;
context.lgwin = DEFAULT_LGWIN;
context.force_overwrite = BROTLI_FALSE;
context.junk_source = BROTLI_FALSE;
context.copy_stat = BROTLI_TRUE;
context.test_integrity = BROTLI_FALSE;
context.verbose = BROTLI_FALSE;
context.write_to_stdout = BROTLI_FALSE;
context.decompress = BROTLI_FALSE;
context.output_path = NULL;
context.dictionary_path = NULL;
context.suffix = DEFAULT_SUFFIX;
for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
context.longest_path_len = 1;
context.input_count = 0;
context.argc = argc;
context.argv = argv;
context.dictionary = NULL;
context.dictionary_size = 0;
context.modified_path = NULL;
context.iterator = 0;
context.ignore = 0;
context.iterator_error = BROTLI_FALSE;
context.buffer = NULL;
context.current_input_path = NULL;
context.current_output_path = NULL;
context.fin = NULL;
context.fout = NULL;
command = ParseParams(&context);
if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
command == COMMAND_TEST_INTEGRITY) {
if (!ReadDictionary(&context)) is_ok = BROTLI_FALSE;
if (is_ok) {
size_t modified_path_len =
context.longest_path_len + strlen(context.suffix) + 1;
context.modified_path = (char*)malloc(modified_path_len);
context.buffer = (uint8_t*)malloc(kFileBufferSize * 2);
if (!context.modified_path || !context.buffer) {
fprintf(stderr, "out of memory\n");
is_ok = BROTLI_FALSE;
} else {
context.input = context.buffer;
context.output = context.buffer + kFileBufferSize;
}
}
}
if (!is_ok) command = COMMAND_NOOP;
switch (command) {
case COMMAND_NOOP:
break;
case COMMAND_VERSION:
PrintVersion();
break;
case COMMAND_COMPRESS:
is_ok = CompressFiles(&context);
break;
case COMMAND_DECOMPRESS:
case COMMAND_TEST_INTEGRITY:
is_ok = DecompressFiles(&context);
break;
case COMMAND_HELP:
case COMMAND_INVALID:
default:
PrintHelp(FileName(argv[0]));
is_ok = (command == COMMAND_HELP);
break;
}
if (context.iterator_error) is_ok = BROTLI_FALSE;
free(context.dictionary);
free(context.modified_path);
free(context.buffer);
if (!is_ok) exit(1);
return 0;
}

110
c/tools/brotli.md Executable file
View File

@ -0,0 +1,110 @@
brotli(1) -- brotli, unbrotli - compress or decompress files
================================================================
SYNOPSIS
--------
`brotli` [*OPTION|FILE*]...
`unbrotli` is equivalent to `brotli --decompress`
DESCRIPTION
-----------
`brotli` is a generic-purpose lossless compression algorithm that compresses
data using a combination of a modern variant of the **LZ77** algorithm, Huffman
coding and 2-nd order context modeling, with a compression ratio comparable to
the best currently available general-purpose compression methods. It is similar
in speed with deflate but offers more dense compression.
`brotli` command line syntax similar to `gzip (1)` and `zstd (1)`.
Unlike `gzip (1)`, source files are preserved by default. It is possible to
remove them after processing by using the `--rm` _option_.
Arguments that look like "`--name`" or "`--name=value`" are _options_. Every
_option_ has a short form "`-x`" or "`-x value`". Multiple short form _options_
could be coalesced:
* "`--decompress --stdout --suffix=.b`" works the same as
* "`-d -s -S .b`" and
* "`-dsS .b`"
`brotli` has 3 operation modes:
* default mode is compression;
* `--decompress` option activates decompression mode;
* `--test` option switches to integrity test mode; this option is equivalent to
"`--decompress --stdout`" except that the decompressed data is discarded
instead of being written to standard output.
Every non-option argument is a _file_ entry. If no _files_ are given or _file_
is "`-`", `brotli` reads from standard input. All arguments after "`--`" are
_file_ entries.
Unless `--stdout` or `--output` is specified, _files_ are written to a new file
whose name is derived from the source _file_ name:
* when compressing, a suffix is appended to the source filename to
get the target filename
* when decompressing, a suffix is removed from the source filename to
get the target filename
Default suffix is `.br`, but it could be specified with `--suffix` option.
Conflicting or duplicate _options_ are not allowed.
OPTIONS
-------
* `-#`:
compression level (0-9); bigger values cause denser, but slower compression
* `-c`, `--stdout`:
write on standard output
* `-d`, `--decompress`:
decompress mode
* `-f`, `--force`:
force output file overwrite
* `-h`, `--help`:
display this help and exit
* `-j`, `--rm`:
remove source file(s); `gzip (1)`-like behaviour
* `-k`, `--keep`:
keep source file(s); `zstd (1)`-like behaviour
* `-n`, `--no-copy-stat`:
do not copy source file(s) attributes
* `-o FILE`, `--output=FILE`
output file; valid only if there is a single input entry
* `-q NUM`, `--quality=NUM`:
compression level (0-11); bigger values cause denser, but slower compression
* `-t`, `--test`:
test file integrity mode
* `-v`, `--verbose`:
increase output verbosity
* `-w NUM`, `--lgwin=NUM`:
set LZ77 window size (0, 10-24) (default: 22); window size is
`(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
windows size improve density; decoder might require up to window size
memory to operate
* `-D FILE`, `--dictionary=FILE`:
use FILE as LZ77 dictionary; same dictionary MUST be used both for
compression and decompression
* `-S SUF`, `--suffix=SUF`:
output file suffix (default: `.br`)
* `-V`, `--version`:
display version and exit
* `-Z`, `--best`:
use best compression level (default); same as "`-q 11`"
SEE ALSO
--------
`brotli` file format is defined in
[RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
`brotli` is open-sourced under the
[MIT License](https://opensource.org/licenses/MIT).
Mailing list: https://groups.google.com/forum/#!forum/brotli
BUGS
----
Report bugs at: https://github.com/google/brotli/issues

136
docs/brotli.1 Executable file
View File

@ -0,0 +1,136 @@
.TH "BROTLI" "1" "May 2017" "brotli 1.0.0" "User commands"
.SH "NAME"
\fBbrotli\fR \- brotli, unbrotli \- compress or decompress files
.SH SYNOPSIS
.P
\fBbrotli\fP [\fIOPTION|FILE\fR]\.\.\.
.P
\fBunbrotli\fP is equivalent to \fBbrotli \-\-decompress\fP
.SH DESCRIPTION
.P
\fBbrotli\fP is a generic\-purpose lossless compression algorithm that compresses
data using a combination of a modern variant of the \fBLZ77\fR algorithm, Huffman
coding and 2\-nd order context modeling, with a compression ratio comparable to
the best currently available general\-purpose compression methods\. It is similar
in speed with deflate but offers more dense compression\.
.P
\fBbrotli\fP command line syntax similar to \fBgzip (1)\fP and \fBzstd (1)\fP\|\.
Unlike \fBgzip (1)\fP, source files are preserved by default\. It is possible to
remove them after processing by using the \fB\-\-rm\fP \fIoption\fR\|\.
.P
Arguments that look like "\fB\-\-name\fP" or "\fB\-\-name=value\fP" are \fIoptions\fR\|\. Every
\fIoption\fR has a short form "\fB\-x\fP" or "\fB\-x value\fP"\. Multiple short form \fIoptions\fR
could be coalesced:
.RS 0
.IP \(bu 2
"\fB\-\-decompress \-\-stdout \-\-suffix=\.b\fP" works the same as
.IP \(bu 2
"\fB\-d \-s \-S \.b\fP" and
.IP \(bu 2
"\fB\-dsS \.b\fP"
.RE
.P
\fBbrotli\fP has 3 operation modes:
.RS 0
.IP \(bu 2
default mode is compression;
.IP \(bu 2
\fB\-\-decompress\fP option activates decompression mode;
.IP \(bu 2
\fB\-\-test\fP option switches to integrity test mode; this option is equivalent to
"\fB\-\-decompress \-\-stdout\fP" except that the decompressed data is discarded
instead of being written to standard output\.
.RE
.P
Every non\-option argument is a \fIfile\fR entry\. If no \fIfiles\fR are given or \fIfile\fR
is "\fB\-\fP", \fBbrotli\fP reads from standard input\. All arguments after "\fB\-\-\fP" are
\fIfile\fR entries\.
.P
Unless \fB\-\-stdout\fP or \fB\-\-output\fP is specified, \fIfiles\fR are written to a new file
whose name is derived from the source \fIfile\fR name:
.RS 0
.IP \(bu 2
when compressing, a suffix is appended to the source filename to
get the target filename
.IP \(bu 2
when decompressing, a suffix is removed from the source filename to
get the target filename
.RE
.P
Default suffix is \fB\|\.br\fP, but it could be specified with \fB\-\-suffix\fP option\.
.P
Conflicting or duplicate \fIoptions\fR are not allowed\.
.SH OPTIONS
.RS 0
.IP \(bu 2
\fB\-#\fP:
compression level (0\-9); bigger values cause denser, but slower compression
.IP \(bu 2
\fB\-c\fP, \fB\-\-stdout\fP:
write on standard output
.IP \(bu 2
\fB\-d\fP, \fB\-\-decompress\fP:
decompress mode
.IP \(bu 2
\fB\-f\fP, \fB\-\-force\fP:
force output file overwrite
.IP \(bu 2
\fB\-h\fP, \fB\-\-help\fP:
display this help and exit
.IP \(bu 2
\fB\-j\fP, \fB\-\-rm\fP:
remove source file(s); \fBgzip (1)\fP\-like behaviour
.IP \(bu 2
\fB\-k\fP, \fB\-\-keep\fP:
keep source file(s); \fBzstd (1)\fP\-like behaviour
.IP \(bu 2
\fB\-n\fP, \fB\-\-no\-copy\-stat\fP:
do not copy source file(s) attributes
.IP \(bu 2
\fB\-o FILE\fP, \fB\-\-output=FILE\fP
output file; valid only if there is a single input entry
.IP \(bu 2
\fB\-q NUM\fP, \fB\-\-quality=NUM\fP:
compression level (0\-11); bigger values cause denser, but slower compression
.IP \(bu 2
\fB\-t\fP, \fB\-\-test\fP:
test file integrity mode
.IP \(bu 2
\fB\-v\fP, \fB\-\-verbose\fP:
increase output verbosity
.IP \(bu 2
\fB\-w NUM\fP, \fB\-\-lgwin=NUM\fP:
set LZ77 window size (0, 10\-24) (default: 22); window size is
\fB(2**NUM \- 16)\fP; 0 lets compressor decide over the optimal value; bigger
windows size improve density; decoder might require up to window size
memory to operate
.IP \(bu 2
\fB\-D FILE\fP, \fB\-\-dictionary=FILE\fP:
use FILE as LZ77 dictionary; same dictionary MUST be used both for
compression and decompression
.IP \(bu 2
\fB\-S SUF\fP, \fB\-\-suffix=SUF\fP:
output file suffix (default: \fB\|\.br\fP)
.IP \(bu 2
\fB\-V\fP, \fB\-\-version\fP:
display version and exit
.IP \(bu 2
\fB\-Z\fP, \fB\-\-best\fP:
use best compression level (default); same as "\fB\-q 11\fP"
.RE
.SH SEE ALSO
.P
\fBbrotli\fP file format is defined in
RFC 7932 \fIhttps://www\.ietf\.org/rfc/rfc7932\.txt\fR\|\.
.P
\fBbrotli\fP is open\-sourced under the
MIT License \fIhttps://opensource\.org/licenses/MIT\fR\|\.
.P
Mailing list: https://groups\.google\.com/forum/#!forum/brotli
.SH BUGS
.P
Report bugs at: https://github\.com/google/brotli/issues

View File

@ -1,4 +1,4 @@
.TH "decode.h" 3 "Tue Feb 28 2017" "Brotli" \" -*- nroff -*-
.TH "decode.h" 3 "Sun May 21 2017" "Brotli" \" -*- nroff -*-
.ad l
.nh
.SH NAME
@ -233,9 +233,9 @@ Input is never overconsumed, so \fCnext_in\fP and \fCavailable_in\fP could be pa
.RS 4
\fBBROTLI_DECODER_RESULT_ERROR\fP if input is corrupted, memory allocation failed, arguments were invalid, etc\&.; use \fBBrotliDecoderGetErrorCode\fP to get detailed error code
.PP
\fBBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT\fP decoding is blocked until more output space is provided
\fBBROTLI_DECODER_RESULT_NEEDS_MORE_INPUT\fP decoding is blocked until more input data is provided
.PP
\fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP decoding is blocked until more input data is provided
\fBBROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT\fP decoding is blocked until more output space is provided
.PP
\fBBROTLI_DECODER_RESULT_SUCCESS\fP decoding is finished, no more input might be consumed and no more output will be produced
.RE

View File

@ -9,9 +9,11 @@ import (
"bytes"
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"testing"
"time"
)
func checkCompressedData(compressedData, wantOriginalData []byte) error {
@ -173,6 +175,85 @@ func TestEncoderFlush(t *testing.T) {
}
}
type readerWithTimeout struct {
io.ReadCloser
}
func (r readerWithTimeout) Read(p []byte) (int, error) {
type result struct {
n int
err error
}
ch := make(chan result)
go func() {
n, err := r.ReadCloser.Read(p)
ch <- result{n, err}
}()
select {
case result := <-ch:
return result.n, result.err
case <-time.After(5 * time.Second):
return 0, fmt.Errorf("read timed out")
}
}
func TestDecoderStreaming(t *testing.T) {
pr, pw := io.Pipe()
writer := NewWriter(pw, WriterOptions{Quality: 5, LGWin: 20})
reader := readerWithTimeout{NewReader(pr)}
defer func() {
if err := reader.Close(); err != nil {
t.Errorf("reader.Close: %v", err)
}
go ioutil.ReadAll(pr) // swallow the "EOF" token from writer.Close
if err := writer.Close(); err != nil {
t.Errorf("writer.Close: %v", err)
}
}()
ch := make(chan []byte)
errch := make(chan error)
go func() {
for {
segment, ok := <-ch
if !ok {
return
}
if n, err := writer.Write(segment); err != nil || n != len(segment) {
errch <- fmt.Errorf("write=%v,%v, want %v,%v", n, err, len(segment), nil)
return
}
if err := writer.Flush(); err != nil {
errch <- fmt.Errorf("flush: %v", err)
return
}
}
}()
defer close(ch)
segments := [...][]byte{
[]byte("first"),
[]byte("second"),
[]byte("third"),
}
for k, segment := range segments {
t.Run(fmt.Sprintf("Segment%d", k), func(t *testing.T) {
select {
case ch <- segment:
case err := <-errch:
t.Fatalf("write: %v", err)
case <-time.After(5 * time.Second):
t.Fatalf("timed out")
}
wantLen := len(segment)
got := make([]byte, wantLen)
if n, err := reader.Read(got); err != nil || n != wantLen || !bytes.Equal(got, segment) {
t.Fatalf("read[%d]=%q,%v,%v, want %q,%v,%v", k, got, n, err, segment, wantLen, nil)
}
})
}
}
func TestReader(t *testing.T) {
content := bytes.Repeat([]byte("hello world!"), 10000)
encoded, _ := Encode(content, WriterOptions{Quality: 5})

View File

@ -95,7 +95,7 @@ func (r *Reader) Read(p []byte) (n int, err error) {
return 0, nil
}
for n == 0 {
for {
var written, consumed C.size_t
var data *C.uint8_t
if len(r.in) != 0 {
@ -128,9 +128,14 @@ func (r *Reader) Read(p []byte) (n int, err error) {
return 0, errInvalidState
}
// Calling r.src.Read may block. Don't block if we have data to return.
if n > 0 {
return n, nil
}
// Top off the buffer.
encN, err := r.src.Read(r.buf)
if encN == 0 && n == 0 {
if encN == 0 {
// Not enough data to complete decoding.
if err == io.EOF {
return 0, io.ErrUnexpectedEOF

View File

@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # MIT
java_library(
name = "lib",
name = "dec",
srcs = glob(["*.java"], exclude = ["*Test*.java"]),
)
@ -14,7 +14,7 @@ java_library(
name = "test_lib",
srcs = glob(["*Test*.java"]),
deps = [
":lib",
":dec",
"@junit_junit//jar",
],
testonly = 1,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -8,6 +8,7 @@ package org.brotli.dec;
import static org.junit.Assert.assertEquals;
import java.nio.ByteBuffer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -18,10 +19,10 @@ import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class DictionaryTest {
private static long crc64(byte[] data) {
private static long crc64(ByteBuffer data) {
long crc = -1;
for (int i = 0; i < data.length; ++i) {
long c = (crc ^ (long) (data[i] & 0xFF)) & 0xFF;
for (int i = 0; i < data.capacity(); ++i) {
long c = (crc ^ (long) (data.get(i) & 0xFF)) & 0xFF;
for (int k = 0; k < 8; k++) {
c = (c >>> 1) ^ (-(c & 1L) & -3932672073523589310L);
}

View File

@ -0,0 +1,76 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
package org.brotli.dec;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/**
* Tests for {@link Dictionary}.
*/
@RunWith(JUnit4.class)
public class SetDictionaryTest {
/** See {@link SynthTest} */
private static final byte[] BASE_DICT_WORD = {
(byte) 0x1b, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x80,
(byte) 0xe3, (byte) 0xb4, (byte) 0x0d, (byte) 0x00, (byte) 0x00, (byte) 0x07, (byte) 0x5b,
(byte) 0x26, (byte) 0x31, (byte) 0x40, (byte) 0x02, (byte) 0x00, (byte) 0xe0, (byte) 0x4e,
(byte) 0x1b, (byte) 0x41, (byte) 0x02
};
/** See {@link SynthTest} */
private static final byte[] ONE_COMMAND = {
(byte) 0x1b, (byte) 0x02, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x80,
(byte) 0xe3, (byte) 0xb4, (byte) 0x0d, (byte) 0x00, (byte) 0x00, (byte) 0x07, (byte) 0x5b,
(byte) 0x26, (byte) 0x31, (byte) 0x40, (byte) 0x02, (byte) 0x00, (byte) 0xe0, (byte) 0x4e,
(byte) 0x1b, (byte) 0x11, (byte) 0x86, (byte) 0x02
};
@Test
public void testSetDictionary() throws IOException {
byte[] buffer = new byte[16];
BrotliInputStream decoder;
// No dictionary set; still decoding should succeed, if no dictionary entries are used.
decoder = new BrotliInputStream(new ByteArrayInputStream(ONE_COMMAND));
assertEquals(3, decoder.read(buffer, 0, buffer.length));
assertEquals("aaa", new String(buffer, 0, 3, "US-ASCII"));
decoder.close();
// Decoding of dictionary item must fail.
decoder = new BrotliInputStream(new ByteArrayInputStream(BASE_DICT_WORD));
boolean decodingFailed = false;
try {
decoder.read(buffer, 0, buffer.length);
} catch (IOException ex) {
decodingFailed = true;
}
assertEquals(true, decodingFailed);
decoder.close();
// Load dictionary data.
FileChannel dictionaryChannel =
new FileInputStream(System.getProperty("RFC_DICTIONARY")).getChannel();
ByteBuffer dictionary = dictionaryChannel.map(FileChannel.MapMode.READ_ONLY, 0, 122784).load();
Dictionary.setData(dictionary);
// Retry decoding of dictionary item.
decoder = new BrotliInputStream(new ByteArrayInputStream(BASE_DICT_WORD));
assertEquals(4, decoder.read(buffer, 0, buffer.length));
assertEquals("time", new String(buffer, 0, 4, "US-ASCII"));
decoder.close();
}
}

View File

@ -27,6 +27,8 @@ import static org.brotli.dec.WordTransformType.OMIT_LAST_9;
import static org.brotli.dec.WordTransformType.UPPERCASE_ALL;
import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST;
import java.nio.ByteBuffer;
/**
* Transformations on dictionary words.
*/
@ -174,7 +176,7 @@ final class Transform {
new Transform(" ", UPPERCASE_FIRST, "='")
};
static int transformDictionaryWord(byte[] dst, int dstOffset, byte[] word, int wordOffset,
static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer data, int wordOffset,
int len, Transform transform) {
int offset = dstOffset;
@ -198,7 +200,7 @@ final class Transform {
len -= WordTransformType.getOmitLast(op);
i = len;
while (i > 0) {
dst[offset++] = word[wordOffset++];
dst[offset++] = data.get(wordOffset++);
i--;
}

View File

@ -9,6 +9,7 @@ package org.brotli.dec;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.nio.ByteBuffer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -36,7 +37,8 @@ public class TransformTest {
byte[] output = new byte[2];
byte[] input = {119, 111, 114, 100}; // "word"
Transform transform = new Transform("[", WordTransformType.OMIT_FIRST_5, "]");
Transform.transformDictionaryWord(output, 0, input, 0, input.length, transform);
Transform.transformDictionaryWord(
output, 0, ByteBuffer.wrap(input), 0, input.length, transform);
byte[] expectedOutput = {91, 93}; // "[]"
assertArrayEquals(expectedOutput, output);
}
@ -46,7 +48,8 @@ public class TransformTest {
byte[] output = new byte[8];
byte[] input = {113, -61, -90, -32, -92, -86}; // "qæप"
Transform transform = new Transform("[", WordTransformType.UPPERCASE_ALL, "]");
Transform.transformDictionaryWord(output, 0, input, 0, input.length, transform);
Transform.transformDictionaryWord(
output, 0, ByteBuffer.wrap(input), 0, input.length, transform);
byte[] expectedOutput = {91, 81, -61, -122, -32, -92, -81, 93}; // "[QÆय]"
assertArrayEquals(expectedOutput, output);
}
@ -61,7 +64,7 @@ public class TransformTest {
int offset = 0;
for (int i = 0; i < Transform.TRANSFORMS.length; ++i) {
offset += Transform.transformDictionaryWord(
output, offset, testWord, 0, testWord.length, Transform.TRANSFORMS[i]);
output, offset, ByteBuffer.wrap(testWord), 0, testWord.length, Transform.TRANSFORMS[i]);
output[offset++] = -1;
}
assertEquals(output.length, offset);

View File

@ -35,6 +35,9 @@
<testIncludes>
<include>**/dec/*Test*.java</include>
</testIncludes>
<testExcludes>
<exclude>**/dec/SetDictionaryTest.java</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>

View File

@ -2,15 +2,23 @@
# Integration test runner + corpus for Java port of Brotli decoder.
java_library(
name = "bundle_checker_lib",
name = "bundle_helper",
srcs = ["BundleHelper.java"],
)
java_library(
name = "bundle_checker",
srcs = ["BundleChecker.java"],
deps = ["//java/org/brotli/dec:lib"],
deps = [
":bundle_helper",
"//java/org/brotli/dec",
],
)
java_binary(
name = "bundle_checker",
name = "bundle_checker_bin",
main_class = "org.brotli.integration.BundleChecker",
runtime_deps = [":bundle_checker_lib"],
runtime_deps = [":bundle_checker"],
)
java_test(
@ -19,7 +27,7 @@ java_test(
data = ["test_data.zip"],
main_class = "org.brotli.integration.BundleChecker",
use_testrunner = 0,
runtime_deps = [":bundle_checker_lib"],
runtime_deps = [":bundle_checker"],
)
java_test(
@ -31,5 +39,5 @@ java_test(
data = ["fuzz_data.zip"],
main_class = "org.brotli.integration.BundleChecker",
use_testrunner = 0,
runtime_deps = [":bundle_checker_lib"],
runtime_deps = [":bundle_checker"],
)

View File

@ -12,7 +12,6 @@ import java.io.FileNotFoundException;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@ -39,28 +38,6 @@ public class BundleChecker implements Runnable {
this.sanityCheck = sanityCheck;
}
/** ECMA CRC64 polynomial. */
private static final long CRC_64_POLY =
new BigInteger("C96C5795D7870F42", 16).longValue();
/**
* Rolls CRC64 calculation.
*
* <p> {@code CRC64(data) = -1 ^ updateCrc64((... updateCrc64(-1, firstBlock), ...), lastBlock);}
* <p> This simple and reliable checksum is chosen to make is easy to calculate the same value
* across the variety of languages (C++, Java, Go, ...).
*/
private static long updateCrc64(long crc, byte[] data, int offset, int length) {
for (int i = offset; i < offset + length; ++i) {
long c = (crc ^ (long) (data[i] & 0xFF)) & 0xFF;
for (int k = 0; k < 8; k++) {
c = ((c & 1) == 1) ? CRC_64_POLY ^ (c >>> 1) : c >>> 1;
}
crc = c ^ (crc >>> 8);
}
return crc;
}
private long decompressAndCalculateCrc(ZipInputStream input) throws IOException {
/* Do not allow entry readers to close the whole ZipInputStream. */
FilterInputStream entryStream = new FilterInputStream(input) {
@ -68,18 +45,14 @@ public class BundleChecker implements Runnable {
public void close() {}
};
long crc = -1;
byte[] buffer = new byte[65536];
BrotliInputStream decompressedStream = new BrotliInputStream(entryStream);
while (true) {
int len = decompressedStream.read(buffer);
if (len <= 0) {
break;
}
crc = updateCrc64(crc, buffer, 0, len);
long crc;
try {
crc = BundleHelper.fingerprintStream(decompressedStream);
} finally {
decompressedStream.close();
}
decompressedStream.close();
return ~crc;
return crc;
}
@Override
@ -99,9 +72,7 @@ public class BundleChecker implements Runnable {
continue;
}
entryName = entry.getName();
int dotIndex = entryName.indexOf('.');
String entryCrcString = (dotIndex == -1) ? entryName : entryName.substring(0, dotIndex);
long entryCrc = new BigInteger(entryCrcString, 16).longValue();
long entryCrc = BundleHelper.getExpectedFingerprint(entryName);
try {
if (entryCrc != decompressAndCalculateCrc(zis) && !sanityCheck) {
throw new RuntimeException("CRC mismatch");

View File

@ -0,0 +1,113 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
package org.brotli.integration;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* Utilities to work test files bundles in zip archive.
*/
public class BundleHelper {
private BundleHelper() { }
public static List<String> listEntries(InputStream input) throws IOException {
List<String> result = new ArrayList<String>();
ZipInputStream zis = new ZipInputStream(input);
ZipEntry entry;
try {
while ((entry = zis.getNextEntry()) != null) {
if (!entry.isDirectory()) {
result.add(entry.getName());
}
zis.closeEntry();
}
} finally {
zis.close();
}
return result;
}
public static byte[] readStream(InputStream input) throws IOException {
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[65536];
int bytesRead;
while ((bytesRead = input.read(buffer)) != -1) {
result.write(buffer, 0, bytesRead);
}
return result.toByteArray();
}
public static byte[] readEntry(InputStream input, String entryName) throws IOException {
ZipInputStream zis = new ZipInputStream(input);
ZipEntry entry;
try {
while ((entry = zis.getNextEntry()) != null) {
if (entry.getName().equals(entryName)) {
byte[] result = readStream(zis);
zis.closeEntry();
return result;
}
zis.closeEntry();
}
} finally {
zis.close();
}
/* entry not found */
return null;
}
/** ECMA CRC64 polynomial. */
private static final long CRC_64_POLY =
new BigInteger("C96C5795D7870F42", 16).longValue();
/**
* Rolls CRC64 calculation.
*
* <p> {@code CRC64(data) = -1 ^ updateCrc64((... updateCrc64(-1, firstBlock), ...), lastBlock);}
* <p> This simple and reliable checksum is chosen to make is easy to calculate the same value
* across the variety of languages (C++, Java, Go, ...).
*/
public static long updateCrc64(long crc, byte[] data, int offset, int length) {
for (int i = offset; i < offset + length; ++i) {
long c = (crc ^ (long) (data[i] & 0xFF)) & 0xFF;
for (int k = 0; k < 8; k++) {
c = ((c & 1) == 1) ? CRC_64_POLY ^ (c >>> 1) : c >>> 1;
}
crc = c ^ (crc >>> 8);
}
return crc;
}
/**
* Calculates CRC64 of stream contents.
*/
public static long fingerprintStream(InputStream input) throws IOException {
byte[] buffer = new byte[65536];
long crc = -1;
while (true) {
int len = input.read(buffer);
if (len <= 0) {
break;
}
crc = updateCrc64(crc, buffer, 0, len);
}
return ~crc;
}
public static long getExpectedFingerprint(String entryName) {
int dotIndex = entryName.indexOf('.');
String entryCrcString = (dotIndex == -1) ? entryName : entryName.substring(0, dotIndex);
return new BigInteger(entryCrcString, 16).longValue();
}
}

Binary file not shown.

View File

@ -63,9 +63,9 @@ project "brotlienc_static"
files { "c/enc/**.h", "c/enc/**.c" }
links "brotlicommon_static"
project "bro"
project "brotli"
kind "ConsoleApp"
language "C"
linkoptions "-static"
files { "c/tools/bro.c" }
files { "c/tools/brotli.c" }
links { "brotlicommon_static", "brotlidec_static", "brotlienc_static" }

View File

@ -92,7 +92,7 @@ case "$1" in
"bazel")
export RELEASE_DATE=`date +%Y-%m-%d`
perl -p -i -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' scripts/.bintray.json
zip -j9 brotli.zip bazel-bin/libbrotli*.a bazel-bin/libbrotli*.so bazel-bin/bro
zip -j9 brotli.zip bazel-bin/libbrotli*.a bazel-bin/libbrotli*.so bazel-bin/brotli
;;
esac
;;

View File

@ -9,9 +9,9 @@ test: deps
./roundtrip_test.sh
deps :
$(MAKE) -C $(BROTLI) bro
$(MAKE) -C $(BROTLI) brotli
clean :
rm -f testdata/*.{bro,unbro,uncompressed}
rm -f $(BROTLI)/c/{enc,dec,tools}/*.{un,}bro
$(MAKE) -C $(BROTLI)/c/tools clean
rm -f testdata/*.{br,unbr,uncompressed}
rm -f $(BROTLI)/{enc,dec,tools}/*.{un,}br
$(MAKE) -C $(BROTLI)/tools clean

View File

@ -5,7 +5,7 @@
set -o errexit
BRO=bin/bro
BROTLI=bin/brotli
TMP_DIR=bin/tmp
for file in tests/testdata/*.compressed*; do
@ -13,10 +13,10 @@ for file in tests/testdata/*.compressed*; do
expected=${file%.compressed*}
uncompressed=${TMP_DIR}/${expected##*/}.uncompressed
echo $uncompressed
$BRO -f -d -i $file -o $uncompressed
$BROTLI $file -fdo $uncompressed
diff -q $uncompressed $expected
# Test the streaming version
cat $file | $BRO -d > $uncompressed
cat $file | $BROTLI -dc > $uncompressed
diff -q $uncompressed $expected
rm -f $uncompressed
done

View File

@ -4,7 +4,7 @@
set -o errexit
BRO=bin/bro
BROTLI=bin/brotli
TMP_DIR=bin/tmp
INPUTS="""
tests/testdata/alice29.txt
@ -14,19 +14,19 @@ tests/testdata/plrabn12.txt
c/enc/encode.c
c/common/dictionary.h
c/dec/decode.c
$BRO
$BROTLI
"""
for file in $INPUTS; do
for quality in 1 6 9 11; do
echo "Roundtrip testing $file at quality $quality"
compressed=${TMP_DIR}/${file##*/}.bro
uncompressed=${TMP_DIR}/${file##*/}.unbro
$BRO -f -q $quality -i $file -o $compressed
$BRO -f -d -i $compressed -o $uncompressed
compressed=${TMP_DIR}/${file##*/}.br
uncompressed=${TMP_DIR}/${file##*/}.unbr
$BROTLI -fq $quality $file -o $compressed
$BROTLI $compressed -fdo $uncompressed
diff -q $file $uncompressed
# Test the streaming version
cat $file | $BRO -q $quality | $BRO -d >$uncompressed
cat $file | $BROTLI -cq $quality | $BROTLI -cd >$uncompressed
diff -q $file $uncompressed
done
done

View File

@ -3,7 +3,7 @@ get_filename_component(OUTPUT_NAME "${REFERENCE_DATA}" NAME)
execute_process(
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress --input ${INPUT} --output ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbro
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress ${INPUT} --output=${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbr
RESULT_VARIABLE result)
if(result)
message(FATAL_ERROR "Decompression failed")
@ -25,4 +25,4 @@ function(test_file_equality f1 f2)
endif()
endfunction()
test_file_equality("${REFERENCE_DATA}" "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbro")
test_file_equality("${REFERENCE_DATA}" "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbr")

View File

@ -1,6 +1,6 @@
execute_process(
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --quality ${QUALITY} --input ${INPUT} --output ${OUTPUT}.bro
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --quality=${QUALITY} ${INPUT} --output=${OUTPUT}.br
RESULT_VARIABLE result
ERROR_VARIABLE result_stderr)
if(result)
@ -9,7 +9,7 @@ endif()
execute_process(
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress --input ${OUTPUT}.bro --output ${OUTPUT}.unbro
COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress ${OUTPUT}.br --output=${OUTPUT}.unbr
RESULT_VARIABLE result)
if(result)
message(FATAL_ERROR "Decompression failed")
@ -31,4 +31,4 @@ function(test_file_equality f1 f2)
endif()
endfunction()
test_file_equality("${INPUT}" "${OUTPUT}.unbro")
test_file_equality("${INPUT}" "${OUTPUT}.unbr")