diff --git a/CMakeLists.txt b/CMakeLists.txt index e86b13b..2776f9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,12 +105,6 @@ if (ENABLE_SANITIZER) set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fsanitize=${ENABLE_SANITIZER}") set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fsanitize=${ENABLE_SANITIZER}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${ENABLE_SANITIZER}") - - # By default, brotli depends on undefined behavior, but setting - # BROTLI_BUILD_PORTABLE should result in a build which does not. - if(ENABLE_SANITIZER STREQUAL "undefined") - add_definitions(-DBROTLI_BUILD_PORTABLE) - endif() endif () include(CheckFunctionExists) diff --git a/README.md b/README.md index 6d8219e..0f905e3 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,11 @@ The specification of the Brotli Compressed Data Format is defined in [RFC 7932]( Brotli is open-sourced under the MIT License, see the LICENSE file. +> **Please note:** brotli is a "stream" format; it does not contain +> meta-information, like checksums or uncompresssed data length. It is possible +> to modify "raw" ranges of the compressed stream and the decoder will not +> notice that. + Brotli mailing list: https://groups.google.com/forum/#!forum/brotli diff --git a/c/common/constants.h b/c/common/constants.h index 433c7b2..31e5bd3 100644 --- a/c/common/constants.h +++ b/c/common/constants.h @@ -12,10 +12,11 @@ #ifndef BROTLI_COMMON_CONSTANTS_H_ #define BROTLI_COMMON_CONSTANTS_H_ -#include "platform.h" #include #include +#include "platform.h" + /* Specification: 7.3. Encoding of the context map */ #define BROTLI_CONTEXT_MAP_MAX_RLE 16 diff --git a/c/common/platform.c b/c/common/platform.c index acdc452..25d84a9 100644 --- a/c/common/platform.c +++ b/c/common/platform.c @@ -6,9 +6,10 @@ #include -#include "platform.h" #include +#include "platform.h" + /* Default brotli_alloc_func */ void* BrotliDefaultAllocFunc(void* opaque, size_t size) { BROTLI_UNUSED(opaque); diff --git a/c/common/platform.h b/c/common/platform.h index 0e0e8aa..4186a8e 100644 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -12,9 +12,9 @@ * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations - * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned - read and overlapping memcpy; this reduces decompression speed by 5% * BROTLI_BUILD_NO_RBIT disables "rbit" optimization for ARM CPUs + * BROTLI_BUILD_NO_UNALIGNED_READ_FAST forces off the fast-unaligned-read + optimizations (mainly for testing purposes). * BROTLI_DEBUG dumps file name and line number when decoder detects stream or memory error * BROTLI_ENABLE_LOG enables asserts and dumps various state information @@ -208,15 +208,19 @@ OR: #define BROTLI_TARGET_RISCV64 #endif +#if defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \ + defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64) +#define BROTLI_TARGET_64_BITS 1 +#else +#define BROTLI_TARGET_64_BITS 0 +#endif + #if defined(BROTLI_BUILD_64_BIT) #define BROTLI_64_BITS 1 #elif defined(BROTLI_BUILD_32_BIT) #define BROTLI_64_BITS 0 -#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8_64) || \ - defined(BROTLI_TARGET_POWERPC64) || defined(BROTLI_TARGET_RISCV64) -#define BROTLI_64_BITS 1 #else -#define BROTLI_64_BITS 0 +#define BROTLI_64_BITS BROTLI_TARGET_64_BITS #endif #if (BROTLI_64_BITS) @@ -260,18 +264,19 @@ OR: #undef BROTLI_X_BIG_ENDIAN #endif -#if defined(BROTLI_BUILD_PORTABLE) -#define BROTLI_ALIGNED_READ (!!1) -#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \ +#if defined(BROTLI_BUILD_NO_UNALIGNED_READ_FAST) +#define BROTLI_UNALIGNED_READ_FAST (!!0) +#elif defined(BROTLI_TARGET_X86) || defined(BROTLI_TARGET_X64) || \ defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_ANY) || \ defined(BROTLI_TARGET_RISCV64) -/* Allow unaligned read only for white-listed CPUs. */ -#define BROTLI_ALIGNED_READ (!!0) +/* These targets are known to generate efficient code for unaligned reads + * (e.g. a single instruction, not multiple 1-byte loads, shifted and or'd + * together). */ +#define BROTLI_UNALIGNED_READ_FAST (!!1) #else -#define BROTLI_ALIGNED_READ (!!1) +#define BROTLI_UNALIGNED_READ_FAST (!!0) #endif -#if BROTLI_ALIGNED_READ /* Portable unaligned memory access: read / write values via memcpy. */ static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { uint16_t t; @@ -291,75 +296,6 @@ static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { memcpy(p, &v, sizeof v); } -#else /* BROTLI_ALIGNED_READ */ -/* Unaligned memory access is allowed: just cast pointer to requested type. */ -#if BROTLI_SANITIZED -/* Consider we have an unaligned load/store of 4 bytes from address 0x...05. - AddressSanitizer will treat it as a 3-byte access to the range 05:07 and - will miss a bug if 08 is the first unaddressable byte. - ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will - miss a race between this access and some other accesses to 08. - MemorySanitizer will correctly propagate the shadow on unaligned stores - and correctly report bugs on unaligned loads, but it may not properly - update and report the origin of the uninitialized memory. - For all three tools, replacing an unaligned access with a tool-specific - callback solves the problem. */ -#if defined(__cplusplus) -extern "C" { -#endif /* __cplusplus */ - uint16_t __sanitizer_unaligned_load16(const void* p); - uint32_t __sanitizer_unaligned_load32(const void* p); - uint64_t __sanitizer_unaligned_load64(const void* p); - void __sanitizer_unaligned_store64(void* p, uint64_t v); -#if defined(__cplusplus) -} /* extern "C" */ -#endif /* __cplusplus */ -#define BrotliUnalignedRead16 __sanitizer_unaligned_load16 -#define BrotliUnalignedRead32 __sanitizer_unaligned_load32 -#define BrotliUnalignedRead64 __sanitizer_unaligned_load64 -#define BrotliUnalignedWrite64 __sanitizer_unaligned_store64 -#else /* BROTLI_SANITIZED */ -static BROTLI_INLINE uint16_t BrotliUnalignedRead16(const void* p) { - return *(const uint16_t*)p; -} -static BROTLI_INLINE uint32_t BrotliUnalignedRead32(const void* p) { - return *(const uint32_t*)p; -} -#if (BROTLI_64_BITS) -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - return *(const uint64_t*)p; -} -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - *(uint64_t*)p = v; -} -#else /* BROTLI_64_BITS */ -/* Avoid emitting LDRD / STRD, which require properly aligned address. */ -/* If __attribute__(aligned) is available, use that. Otherwise, memcpy. */ - -#if BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) -typedef BROTLI_ALIGNED(1) uint64_t brotli_unaligned_uint64_t; - -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - return (uint64_t) ((const brotli_unaligned_uint64_t*) p)[0]; -} -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - brotli_unaligned_uint64_t* dwords = (brotli_unaligned_uint64_t*) p; - dwords[0] = (brotli_unaligned_uint64_t) v; -} -#else /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */ -static BROTLI_INLINE uint64_t BrotliUnalignedRead64(const void* p) { - uint64_t v; - memcpy(&v, p, sizeof(uint64_t)); - return v; -} - -static BROTLI_INLINE void BrotliUnalignedWrite64(void* p, uint64_t v) { - memcpy(p, &v, sizeof(uint64_t)); -} -#endif /* BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) */ -#endif /* BROTLI_64_BITS */ -#endif /* BROTLI_SANITIZED */ -#endif /* BROTLI_ALIGNED_READ */ #if BROTLI_LITTLE_ENDIAN /* Straight endianness. Just read / write values. */ @@ -435,6 +371,16 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) { } #endif /* BROTLI_LITTLE_ENDIAN */ +static BROTLI_INLINE void* BROTLI_UNALIGNED_LOAD_PTR(const void* p) { + void* v; + memcpy(&v, p, sizeof(void*)); + return v; +} + +static BROTLI_INLINE void BROTLI_UNALIGNED_STORE_PTR(void* p, const void* v) { + memcpy(p, &v, sizeof(void*)); +} + /* BROTLI_IS_CONSTANT macros returns true for compile-time constants. */ #if BROTLI_GNUC_HAS_BUILTIN(__builtin_constant_p, 3, 0, 1) || \ BROTLI_INTEL_VERSION_CHECK(16, 0, 0) @@ -467,6 +413,8 @@ static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) { #define BROTLI_DUMP() (void)(0) #endif +/* BrotliRBit assumes brotli_reg_t fits native CPU register type. */ +#if (BROTLI_64_BITS == BROTLI_TARGET_64_BITS) /* TODO(eustas): add appropriate icc/sunpro/arm/ibm/ti checks. */ #if (BROTLI_GNUC_VERSION_CHECK(3, 0, 0) || defined(__llvm__)) && \ !defined(BROTLI_BUILD_NO_RBIT) @@ -480,15 +428,14 @@ static BROTLI_INLINE brotli_reg_t BrotliRBit(brotli_reg_t input) { #define BROTLI_RBIT(x) BrotliRBit(x) #endif /* armv7 / armv8 */ #endif /* gcc || clang */ +#endif /* brotli_reg_t is native */ #if !defined(BROTLI_RBIT) static BROTLI_INLINE void BrotliRBit(void) { /* Should break build if used. */ } #endif /* BROTLI_RBIT */ -#define BROTLI_REPEAT(N, X) { \ - if ((N & 1) != 0) {X;} \ - if ((N & 2) != 0) {X; X;} \ - if ((N & 4) != 0) {X; X; X; X;} \ -} +#define BROTLI_REPEAT_4(X) {X; X; X; X;} +#define BROTLI_REPEAT_5(X) {X; X; X; X; X;} +#define BROTLI_REPEAT_6(X) {X; X; X; X; X; X;} #define BROTLI_UNUSED(X) (void)(X) @@ -553,6 +500,8 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) { BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD32LE); BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD64LE); BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE64LE); + BROTLI_UNUSED(&BROTLI_UNALIGNED_LOAD_PTR); + BROTLI_UNUSED(&BROTLI_UNALIGNED_STORE_PTR); BROTLI_UNUSED(&BrotliRBit); BROTLI_UNUSED(&brotli_min_double); BROTLI_UNUSED(&brotli_max_double); diff --git a/c/common/shared_dictionary_internal.h b/c/common/shared_dictionary_internal.h index 87ab13b..963762e 100644 --- a/c/common/shared_dictionary_internal.h +++ b/c/common/shared_dictionary_internal.h @@ -9,11 +9,12 @@ #ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ #define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ -#include "dictionary.h" #include -#include "transform.h" #include +#include "dictionary.h" +#include "transform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/dec/bit_reader.c b/c/dec/bit_reader.c index 3dc848b..97e21f5 100644 --- a/c/dec/bit_reader.c +++ b/c/dec/bit_reader.c @@ -8,9 +8,10 @@ #include "bit_reader.h" -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif @@ -36,7 +37,7 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) { /* Fixing alignment after unaligned BrotliFillWindow would result accumulator overflow. If unalignment is caused by BrotliSafeReadBits, then there is enough space in accumulator to fix alignment. */ - if (!BROTLI_ALIGNED_READ) { + if (BROTLI_UNALIGNED_READ_FAST) { aligned_read_mask = 0; } if (BrotliGetAvailableBits(br) == 0) { diff --git a/c/dec/bit_reader.h b/c/dec/bit_reader.h index 3906455..c737bda 100644 --- a/c/dec/bit_reader.h +++ b/c/dec/bit_reader.h @@ -11,9 +11,10 @@ #include /* memcpy */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -53,8 +54,8 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br); /* Ensures that accumulator is not empty. May consume up to sizeof(brotli_reg_t) - 1 bytes of input. Returns BROTLI_FALSE if data is required but there is no input available. - For BROTLI_ALIGNED_READ this function also prepares bit reader for aligned - reading. */ + For !BROTLI_UNALIGNED_READ_FAST this function also prepares bit reader for + aligned reading. */ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br); /* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden @@ -107,7 +108,8 @@ static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount( static BROTLI_INLINE void BrotliFillBitWindow( BrotliBitReader* const br, uint32_t n_bits) { #if (BROTLI_64_BITS) - if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { + if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 8)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 56) { br->val_ = @@ -117,8 +119,8 @@ static BROTLI_INLINE void BrotliFillBitWindow( br->avail_in -= 7; br->next_in += 7; } - } else if ( - !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) { + } else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 16)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 48) { br->val_ = @@ -140,7 +142,8 @@ static BROTLI_INLINE void BrotliFillBitWindow( } } #else - if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { + if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) && + (n_bits <= 8)) { uint32_t bit_pos = br->bit_pos_; if (bit_pos >= 24) { br->val_ = diff --git a/c/dec/decode.c b/c/dec/decode.c index 41166f9..845f556 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -113,8 +113,9 @@ void BrotliDecoderDestroyInstance(BrotliDecoderState* state) { /* Saves error code and converts it to BrotliDecoderResult. */ static BROTLI_NOINLINE BrotliDecoderResult SaveErrorCode( - BrotliDecoderState* s, BrotliDecoderErrorCode e) { + BrotliDecoderState* s, BrotliDecoderErrorCode e, size_t consumed_input) { s->error_code = (int)e; + s->used_input += consumed_input; switch (e) { case BROTLI_DECODER_SUCCESS: return BROTLI_DECODER_RESULT_SUCCESS; @@ -1172,7 +1173,7 @@ static BROTLI_INLINE void DetectTrivialLiteralBlockTypes( size_t sample = s->context_map[offset]; size_t j; for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS);) { - BROTLI_REPEAT(4, error |= s->context_map[offset + j++] ^ sample;) + BROTLI_REPEAT_4({ error |= s->context_map[offset + j++] ^ sample; }) } if (error == 0) { s->trivial_literal_contexts[i >> 5] |= 1u << (i & 31); @@ -2243,6 +2244,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream( size_t* available_out, uint8_t** next_out, size_t* total_out) { BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS; BrotliBitReader* br = &s->br; + size_t input_size = *available_in; +#define BROTLI_SAVE_ERROR_CODE(code) \ + SaveErrorCode(s, (code), input_size - *available_in) /* Ensure that |total_out| is set, even if no data will ever be pushed out. */ if (total_out) { *total_out = s->partial_pos_out; @@ -2252,8 +2256,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream( return BROTLI_DECODER_RESULT_ERROR; } if (*available_out && (!next_out || !*next_out)) { - return SaveErrorCode( - s, BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); + return BROTLI_SAVE_ERROR_CODE( + BROTLI_FAILURE(BROTLI_DECODER_ERROR_INVALID_ARGUMENTS)); } if (!*available_out) next_out = 0; if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */ @@ -2586,7 +2590,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( s, &s->distance_hgroup, distance_alphabet_size_max, distance_alphabet_size_limit, s->num_dist_htrees); if (!allocation_success) { - return SaveErrorCode(s, + return BROTLI_SAVE_ERROR_CODE( BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS)); } s->loop_counter = 0; @@ -2600,7 +2604,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream( case 0: hgroup = &s->literal_hgroup; break; case 1: hgroup = &s->insert_copy_hgroup; break; case 2: hgroup = &s->distance_hgroup; break; - default: return SaveErrorCode(s, BROTLI_FAILURE( + default: return BROTLI_SAVE_ERROR_CODE(BROTLI_FAILURE( BROTLI_DECODER_ERROR_UNREACHABLE)); /* COV_NF_LINE */ } result = HuffmanTreeGroupDecode(hgroup, s); @@ -2710,10 +2714,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream( break; } } - return SaveErrorCode(s, result); + return BROTLI_SAVE_ERROR_CODE(result); } } - return SaveErrorCode(s, result); + return BROTLI_SAVE_ERROR_CODE(result); +#undef BROTLI_SAVE_ERROR_CODE } BROTLI_BOOL BrotliDecoderHasMoreOutput(const BrotliDecoderState* s) { @@ -2743,7 +2748,7 @@ const uint8_t* BrotliDecoderTakeOutput(BrotliDecoderState* s, size_t* size) { } else { /* ... or stream is broken. Normally this should be caught by BrotliDecoderDecompressStream, this is just a safeguard. */ - if ((int)status < 0) SaveErrorCode(s, status); + if ((int)status < 0) SaveErrorCode(s, status, 0); *size = 0; result = 0; } diff --git a/c/dec/huffman.c b/c/dec/huffman.c index 8f127d7..3806454 100644 --- a/c/dec/huffman.c +++ b/c/dec/huffman.c @@ -10,9 +10,10 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -117,11 +118,13 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, int bits_count; BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX); + BROTLI_DCHECK(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5); /* Generate offsets into sorted symbol table by code length. */ symbol = -1; bits = 1; - BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, { + /* BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH == 5 */ + BROTLI_REPEAT_5({ symbol += count[bits]; offset[bits] = symbol; bits++; @@ -132,7 +135,7 @@ void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table, /* Sort symbols by length, by symbol order within each length. */ symbol = BROTLI_CODE_LENGTH_CODES; do { - BROTLI_REPEAT(6, { + BROTLI_REPEAT_6({ symbol--; sorted[offset[code_lengths[symbol]]--] = symbol; }); diff --git a/c/dec/huffman.h b/c/dec/huffman.h index a8fbc45..5036096 100644 --- a/c/dec/huffman.h +++ b/c/dec/huffman.h @@ -9,9 +9,10 @@ #ifndef BROTLI_DEC_HUFFMAN_H_ #define BROTLI_DEC_HUFFMAN_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/dec/prefix.h b/c/dec/prefix.h index 481a2c7..e8acf07 100644 --- a/c/dec/prefix.h +++ b/c/dec/prefix.h @@ -10,9 +10,10 @@ #ifndef BROTLI_DEC_PREFIX_H_ #define BROTLI_DEC_PREFIX_H_ -#include "../common/constants.h" #include +#include "../common/constants.h" + typedef struct CmdLutElement { uint8_t insert_len_extra_bits; uint8_t copy_len_extra_bits; diff --git a/c/dec/state.c b/c/dec/state.c index e3170c1..08d4c8b 100644 --- a/c/dec/state.c +++ b/c/dec/state.c @@ -8,8 +8,9 @@ #include /* free, malloc */ -#include "../common/dictionary.h" #include + +#include "../common/dictionary.h" #include "huffman.h" #if defined(__cplusplus) || defined(c_plusplus) @@ -43,6 +44,7 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, s->pos = 0; s->rb_roundtrips = 0; s->partial_pos_out = 0; + s->used_input = 0; s->block_type_trees = NULL; s->block_len_trees = NULL; @@ -129,9 +131,21 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) { BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees); } +#ifdef BROTLI_REPORTING +/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */ +void BrotliDecoderOnFinish(const BrotliDecoderState* s); +#define BROTLI_DECODER_ON_FINISH(s) BrotliDecoderOnFinish(s); +#else +#if !defined(BROTLI_DECODER_ON_FINISH) +#define BROTLI_DECODER_ON_FINISH(s) (void)(s); +#endif +#endif + void BrotliDecoderStateCleanup(BrotliDecoderState* s) { BrotliDecoderStateCleanupAfterMetablock(s); + BROTLI_DECODER_ON_FINISH(s); + BROTLI_DECODER_FREE(s, s->compound_dictionary); BrotliSharedDictionaryDestroyInstance(s->dictionary); s->dictionary = NULL; diff --git a/c/dec/state.h b/c/dec/state.h index 81e6bb6..6ec5c8f 100644 --- a/c/dec/state.h +++ b/c/dec/state.h @@ -9,12 +9,13 @@ #ifndef BROTLI_DEC_STATE_H_ #define BROTLI_DEC_STATE_H_ +#include +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "../common/transform.h" -#include #include "bit_reader.h" #include "huffman.h" @@ -321,6 +322,9 @@ struct BrotliDecoderStateStruct { /* Less used attributes are at the end of this struct. */ + /* For reporting. */ + uint64_t used_input; /* how many bytes of input are consumed */ + /* States inside function calls. */ BrotliRunningMetablockHeaderState substate_metablock_header; BrotliRunningUncompressedState substate_uncompressed; diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index 2cf01d8..ff5b7be 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -8,10 +8,11 @@ #include "backward_references.h" +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "compound_dictionary.h" #include "dictionary_hash.h" diff --git a/c/enc/backward_references.h b/c/enc/backward_references.h index b051e18..20fb98a 100644 --- a/c/enc/backward_references.h +++ b/c/enc/backward_references.h @@ -9,11 +9,12 @@ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_H_ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "hash.h" #include "quality.h" diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index c6a6c8c..6325032 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -10,9 +10,10 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "command.h" #include "compound_dictionary.h" #include "encoder_dict.h" diff --git a/c/enc/backward_references_hq.h b/c/enc/backward_references_hq.h index c9dcc80..8acf975 100644 --- a/c/enc/backward_references_hq.h +++ b/c/enc/backward_references_hq.h @@ -9,11 +9,12 @@ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "command.h" #include "hash.h" #include "memory.h" diff --git a/c/enc/bit_cost.c b/c/enc/bit_cost.c index 8ca4ab1..6b7c904 100644 --- a/c/enc/bit_cost.c +++ b/c/enc/bit_cost.c @@ -8,9 +8,10 @@ #include "bit_cost.h" +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #include "histogram.h" diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h index 4cf3b18..f6f2773 100644 --- a/c/enc/bit_cost.h +++ b/c/enc/bit_cost.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_BIT_COST_H_ #define BROTLI_ENC_BIT_COST_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "fast_log.h" #include "histogram.h" diff --git a/c/enc/block_splitter.h b/c/enc/block_splitter.h index 1de072f..6046b90 100644 --- a/c/enc/block_splitter.h +++ b/c/enc/block_splitter.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ #define BROTLI_ENC_BLOCK_SPLITTER_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "command.h" #include "memory.h" #include "quality.h" diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c index d105102..5fa0c69 100644 --- a/c/enc/brotli_bit_stream.c +++ b/c/enc/brotli_bit_stream.c @@ -12,10 +12,11 @@ #include /* memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #include "entropy_encode_static.h" #include "fast_log.h" diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h index 4285b7f..a289509 100644 --- a/c/enc/brotli_bit_stream.h +++ b/c/enc/brotli_bit_stream.h @@ -16,9 +16,10 @@ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#include + #include "../common/context.h" #include "../common/platform.h" -#include #include "command.h" #include "entropy_encode.h" #include "memory.h" diff --git a/c/enc/cluster.c b/c/enc/cluster.c index b86bbfb..b0faf81 100644 --- a/c/enc/cluster.c +++ b/c/enc/cluster.c @@ -8,8 +8,9 @@ #include "cluster.h" -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "bit_cost.h" /* BrotliPopulationCost */ #include "fast_log.h" #include "histogram.h" diff --git a/c/enc/cluster.h b/c/enc/cluster.h index 107e8a3..013629c 100644 --- a/c/enc/cluster.h +++ b/c/enc/cluster.h @@ -9,8 +9,9 @@ #ifndef BROTLI_ENC_CLUSTER_H_ #define BROTLI_ENC_CLUSTER_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "histogram.h" #include "memory.h" diff --git a/c/enc/command.h b/c/enc/command.h index 43e35d7..ba4de7e 100644 --- a/c/enc/command.h +++ b/c/enc/command.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_COMMAND_H_ #define BROTLI_ENC_COMMAND_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #include "params.h" #include "prefix.h" diff --git a/c/enc/compound_dictionary.c b/c/enc/compound_dictionary.c index d82772f..824e515 100644 --- a/c/enc/compound_dictionary.c +++ b/c/enc/compound_dictionary.c @@ -6,8 +6,9 @@ #include "compound_dictionary.h" -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "memory.h" #include "quality.h" @@ -33,7 +34,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, uint32_t* slot_offsets = NULL; uint16_t* heads = NULL; uint32_t* items = NULL; - uint8_t* source_copy = NULL; + const uint8_t** source_ref = NULL; uint32_t i; uint32_t* slot_size = NULL; uint32_t* slot_limit = NULL; @@ -97,7 +98,7 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, /* Step 3: transfer data to "slim" hasher. */ alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) + (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) + - source_size; + sizeof(uint8_t*); result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size); if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) { @@ -107,14 +108,15 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, slot_offsets = (uint32_t*)(&result[1]); heads = (uint16_t*)(&slot_offsets[num_slots]); items = (uint32_t*)(&heads[num_buckets]); - source_copy = (uint8_t*)(&items[total_items]); + source_ref = (const uint8_t**)(&items[total_items]); - result->magic = kPreparedDictionaryMagic; - result->source_offset = total_items; + result->magic = kLeanPreparedDictionaryMagic; + result->num_items = total_items; result->source_size = (uint32_t)source_size; result->hash_bits = hash_bits; result->bucket_bits = bucket_bits; result->slot_bits = slot_bits; + BROTLI_UNALIGNED_STORE_PTR(source_ref, source); total_items = 0; for (i = 0; i < num_slots; ++i) { @@ -145,7 +147,6 @@ static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m, } BROTLI_FREE(m, flat); - memcpy(source_copy, source, source_size); return result; } @@ -192,8 +193,14 @@ BROTLI_BOOL AttachPreparedDictionary( uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]); uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]); uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]); - compound->chunk_source[index] = - (const uint8_t*)(&items[dictionary->source_offset]); + const void* tail = (void*)&items[dictionary->num_items]; + if (dictionary->magic == kPreparedDictionaryMagic) { + compound->chunk_source[index] = (const uint8_t*)tail; + } else { + /* dictionary->magic == kLeanPreparedDictionaryMagic */ + compound->chunk_source[index] = + (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } } compound->num_chunks++; return BROTLI_TRUE; diff --git a/c/enc/compound_dictionary.h b/c/enc/compound_dictionary.h index 60b12d2..9c531d5 100644 --- a/c/enc/compound_dictionary.h +++ b/c/enc/compound_dictionary.h @@ -7,19 +7,32 @@ #ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_ #define BROTLI_ENC_PREPARED_DICTIONARY_H_ -#include "../common/platform.h" -#include "../common/constants.h" #include #include + +#include "../common/platform.h" +#include "../common/constants.h" #include "memory.h" +/* "Fat" prepared dictionary, could be cooked outside of C implementation, + * e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */ static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0; + +static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1; + +static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2; + +/* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility + * of caller of "prepare dictionary" to keep the LZ77 data while prepared + * dictionary is in use. */ +static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3; + static const uint64_t kPreparedDictionaryHashMul64Long = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); typedef struct PreparedDictionary { uint32_t magic; - uint32_t source_offset; + uint32_t num_items; uint32_t source_size; uint32_t hash_bits; uint32_t bucket_bits; @@ -31,7 +44,8 @@ typedef struct PreparedDictionary { /* uint16_t heads[1 << bucket_bits]; */ /* uint32_t items[variable]; */ - /* uint8_t source[source_size] */ + /* [maybe] uint8_t* source_ref, depending on magic. */ + /* [maybe] uint8_t source[source_size], depending on magic. */ } PreparedDictionary; BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m, diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c index 1f478ca..13890ea 100644 --- a/c/enc/compress_fragment.c +++ b/c/enc/compress_fragment.c @@ -16,8 +16,9 @@ #include /* memcmp, memcpy, memset */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "brotli_bit_stream.h" #include "entropy_encode.h" #include "fast_log.h" diff --git a/c/enc/compress_fragment.h b/c/enc/compress_fragment.h index 099a979..9c0780f 100644 --- a/c/enc/compress_fragment.h +++ b/c/enc/compress_fragment.h @@ -12,9 +12,10 @@ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index 4cbb418..a762679 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c @@ -14,9 +14,10 @@ #include /* memcmp, memcpy, memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "bit_cost.h" #include "brotli_bit_stream.h" #include "entropy_encode.h" diff --git a/c/enc/compress_fragment_two_pass.h b/c/enc/compress_fragment_two_pass.h index f5d0741..6d28d9b 100644 --- a/c/enc/compress_fragment_two_pass.h +++ b/c/enc/compress_fragment_two_pass.h @@ -13,9 +13,10 @@ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "entropy_encode.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/encode.c b/c/enc/encode.c index afceba4..a8ac09a 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -30,6 +30,7 @@ #include "memory.h" #include "metablock.h" #include "prefix.h" +#include "state.h" #include "quality.h" #include "ringbuffer.h" #include "utf8_util.h" @@ -41,84 +42,6 @@ extern "C" { #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); -typedef enum BrotliEncoderStreamState { - /* Default state. */ - BROTLI_STREAM_PROCESSING = 0, - /* Intermediate state; after next block is emitted, byte-padding should be - performed before getting back to default state. */ - BROTLI_STREAM_FLUSH_REQUESTED = 1, - /* Last metablock was produced; no more input is acceptable. */ - BROTLI_STREAM_FINISHED = 2, - /* Flushing compressed block and writing meta-data block header. */ - BROTLI_STREAM_METADATA_HEAD = 3, - /* Writing metadata block body. */ - BROTLI_STREAM_METADATA_BODY = 4 -} BrotliEncoderStreamState; - -typedef enum BrotliEncoderFlintState { - BROTLI_FLINT_NEEDS_2_BYTES = 2, - BROTLI_FLINT_NEEDS_1_BYTE = 1, - BROTLI_FLINT_WAITING_FOR_PROCESSING = 0, - BROTLI_FLINT_WAITING_FOR_FLUSHING = -1, - BROTLI_FLINT_DONE = -2 -} BrotliEncoderFlintState; - -typedef struct BrotliEncoderStateStruct { - BrotliEncoderParams params; - - MemoryManager memory_manager_; - - uint64_t input_pos_; - RingBuffer ringbuffer_; - size_t cmd_alloc_size_; - Command* commands_; - size_t num_commands_; - size_t num_literals_; - size_t last_insert_len_; - uint64_t last_flush_pos_; - uint64_t last_processed_pos_; - int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; - int saved_dist_cache_[4]; - uint16_t last_bytes_; - uint8_t last_bytes_bits_; - /* "Flint" is a tiny uncompressed block emitted before the continuation - block to unwire literal context from previous data. Despite being int8_t, - field is actually BrotliEncoderFlintState enum. */ - int8_t flint_; - uint8_t prev_byte_; - uint8_t prev_byte2_; - size_t storage_size_; - uint8_t* storage_; - - Hasher hasher_; - - /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ - int small_table_[1 << 10]; /* 4KiB */ - int* large_table_; /* Allocated only when needed */ - size_t large_table_size_; - - BrotliOnePassArena* one_pass_arena_; - BrotliTwoPassArena* two_pass_arena_; - - /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ - uint32_t* command_buf_; - uint8_t* literal_buf_; - - uint8_t* next_out_; - size_t available_out_; - size_t total_out_; - /* Temporary buffer for padding flush bits or metadata block header / body. */ - union { - uint64_t u64[2]; - uint8_t u8[16]; - } tiny_buf_; - uint32_t remaining_metadata_bytes_; - BrotliEncoderStreamState stream_state_; - - BROTLI_BOOL is_last_block_emitted_; - BROTLI_BOOL is_initialized_; -} BrotliEncoderStateStruct; - static size_t InputBlockSize(BrotliEncoderState* s) { return (size_t)1 << s->params.lgblock; } @@ -780,6 +703,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) { s->two_pass_arena_ = NULL; s->command_buf_ = NULL; s->literal_buf_ = NULL; + s->total_in_ = 0; s->next_out_ = NULL; s->available_out_ = 0; s->total_out_ = 0; @@ -816,12 +740,26 @@ BrotliEncoderState* BrotliEncoderCreateInstance( return state; } +#ifdef BROTLI_REPORTING +/* When BROTLI_REPORTING is defined extra reporting module have to be linked. */ +void BrotliEncoderOnFinish(const BrotliEncoderState* s); +#define BROTLI_ENCODER_ON_FINISH(s) BrotliEncoderOnFinish(s); +#else +#if !defined(BROTLI_ENCODER_ON_FINISH) +#define BROTLI_ENCODER_ON_FINISH(s) (void)(s); +#endif +#endif + static void BrotliEncoderCleanupState(BrotliEncoderState* s) { MemoryManager* m = &s->memory_manager_; + + BROTLI_ENCODER_ON_FINISH(s); + if (BROTLI_IS_OOM(m)) { BrotliWipeOutMemoryManager(m); return; } + BROTLI_FREE(m, s->storage_); BROTLI_FREE(m, s->commands_); RingBufferFree(m, &s->ringbuffer_); @@ -1006,10 +944,38 @@ static BROTLI_BOOL EncodeData( MemoryManager* m = &s->memory_manager_; ContextType literal_context_mode; ContextLut literal_context_lut; + BROTLI_BOOL fast_compress = + s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || + s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY; data = s->ringbuffer_.buffer_; mask = s->ringbuffer_.mask_; + if (delta == 0) { /* No new input; still might want to flush or finish. */ + if (!data) { /* No input has been processed so far. */ + if (is_last) { /* Emit complete finalized stream. */ + BROTLI_DCHECK(s->last_bytes_bits_ <= 14); + s->last_bytes_ |= (uint16_t)(3u << s->last_bytes_bits_); + s->last_bytes_bits_ = (uint8_t)(s->last_bytes_bits_ + 2u); + s->tiny_buf_.u8[0] = (uint8_t)s->last_bytes_; + s->tiny_buf_.u8[1] = (uint8_t)(s->last_bytes_ >> 8); + *output = s->tiny_buf_.u8; + *out_size = (s->last_bytes_bits_ + 7u) >> 3u; + return BROTLI_TRUE; + } else { /* No data, not last -> no-op. */ + *out_size = 0; + return BROTLI_TRUE; + } + } else { + /* Fast compress performs flush every block -> flush is no-op. */ + if (!is_last && (!force_flush || fast_compress)) { /* Another no-op. */ + *out_size = 0; + return BROTLI_TRUE; + } + } + } + BROTLI_DCHECK(data); + if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE; /* Adding more blocks after "last" block is forbidden. */ if (s->is_last_block_emitted_) return BROTLI_FALSE; @@ -1030,19 +996,12 @@ static BROTLI_BOOL EncodeData( } } - if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || - s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { + if (fast_compress) { uint8_t* storage; size_t storage_ix = s->last_bytes_bits_; size_t table_size; int* table; - if (delta == 0 && !is_last) { - /* We have no new input data and we don't have to finish the stream, so - nothing to do. */ - *out_size = 0; - return BROTLI_TRUE; - } storage = GetBrotliStorage(s, 2 * bytes + 503); if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; storage[0] = (uint8_t)s->last_bytes_; @@ -1238,242 +1197,6 @@ static size_t WriteMetadataHeader( return (storage_ix + 7u) >> 3; } -static BROTLI_NOINLINE BROTLI_BOOL BrotliCompressBufferQuality10( - int lgwin, size_t input_size, const uint8_t* input_buffer, - size_t* encoded_size, uint8_t* encoded_buffer) { - MemoryManager* m = - (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0); - - const size_t mask = BROTLI_SIZE_MAX >> 1; - int dist_cache[4] = { 4, 11, 15, 16 }; - int saved_dist_cache[4] = { 4, 11, 15, 16 }; - BROTLI_BOOL ok = BROTLI_TRUE; - const size_t max_out_size = *encoded_size; - size_t total_out_size = 0; - uint16_t last_bytes; - uint8_t last_bytes_bits; - - const size_t hasher_eff_size = BROTLI_MIN(size_t, - input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP); - - const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); - size_t max_block_size; - const size_t max_metablock_size = (size_t)1 << lgmetablock; - const size_t max_literals_per_metablock = max_metablock_size / 8; - const size_t max_commands_per_metablock = max_metablock_size / 8; - size_t metablock_start = 0; - uint8_t prev_byte = 0; - uint8_t prev_byte2 = 0; - - BrotliEncoderParams* params = NULL; - Hasher* hasher = NULL; - - if (m == NULL) return BROTLI_FALSE; - BrotliInitMemoryManager(m, 0, 0, 0); - params = BROTLI_ALLOC(m, BrotliEncoderParams, 2); - hasher = BROTLI_ALLOC(m, Hasher, 1); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) { - goto oom; - } - BrotliEncoderInitParams(params); - HasherInit(hasher); - - params->quality = 10; - params->lgwin = lgwin; - if (lgwin > BROTLI_MAX_WINDOW_BITS) { - params->large_window = BROTLI_TRUE; - } - SanitizeParams(params); - params->lgblock = ComputeLgBlock(params); - ChooseDistanceParams(params); - max_block_size = (size_t)1 << params->lgblock; - - /* Since default static dictionary is used we assume that - * params->quality < params->dictionary.max_quality. */ - - BROTLI_DCHECK(input_size <= mask + 1); - EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits); - InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params, - 0, hasher_eff_size, BROTLI_TRUE); - if (BROTLI_IS_OOM(m)) goto oom; - - while (ok && metablock_start < input_size) { - const size_t metablock_end = - BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size); - const size_t expected_num_commands = - (metablock_end - metablock_start) / 12 + 16; - Command* commands = 0; - size_t num_commands = 0; - size_t last_insert_len = 0; - size_t num_literals = 0; - size_t metablock_size = 0; - size_t cmd_alloc_size = 0; - BROTLI_BOOL is_last; - uint8_t* storage; - size_t storage_ix; - - ContextType literal_context_mode = ChooseContextMode(params, - input_buffer, metablock_start, mask, metablock_end - metablock_start); - ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); - - size_t block_start; - for (block_start = metablock_start; block_start < metablock_end; ) { - size_t block_size = - BROTLI_MIN(size_t, metablock_end - block_start, max_block_size); - ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1); - size_t path_size; - size_t new_cmd_alloc_size; - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom; - BrotliInitZopfliNodes(nodes, block_size + 1); - StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start, - input_buffer, mask); - path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start, - input_buffer, mask, literal_context_lut, params, dist_cache, hasher, - nodes); - if (BROTLI_IS_OOM(m)) goto oom; - /* We allocate a command buffer in the first iteration of this loop that - will be likely big enough for the whole metablock, so that for most - inputs we will not have to reallocate in later iterations. We do the - allocation here and not before the loop, because if the input is small, - this will be allocated after the Zopfli cost model is freed, so this - will not increase peak memory usage. - TODO(eustas): If the first allocation is too small, increase command - buffer size exponentially. */ - new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands, - num_commands + path_size + 1); - if (cmd_alloc_size != new_cmd_alloc_size) { - Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_commands)) goto oom; - cmd_alloc_size = new_cmd_alloc_size; - if (commands) { - memcpy(new_commands, commands, sizeof(Command) * num_commands); - BROTLI_FREE(m, commands); - } - commands = new_commands; - } - BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache, - &last_insert_len, params, &commands[num_commands], &num_literals); - num_commands += path_size; - block_start += block_size; - metablock_size += block_size; - BROTLI_FREE(m, nodes); - if (num_literals > max_literals_per_metablock || - num_commands > max_commands_per_metablock) { - break; - } - } - - if (last_insert_len > 0) { - InitInsertCommand(&commands[num_commands++], last_insert_len); - num_literals += last_insert_len; - } - - is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); - storage = NULL; - storage_ix = last_bytes_bits; - - if (metablock_size == 0) { - /* Write the ISLAST and ISEMPTY bits. */ - storage = BROTLI_ALLOC(m, uint8_t, 16); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliWriteBits(2, 3, &storage_ix, storage); - storage_ix = (storage_ix + 7u) & ~7u; - } else if (!ShouldCompress(input_buffer, mask, metablock_start, - metablock_size, num_literals, num_commands)) { - /* Restore the distance cache, as its last update by - CreateBackwardReferences is now unused. */ - memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliStoreUncompressedMetaBlock(is_last, input_buffer, - metablock_start, mask, metablock_size, - &storage_ix, storage); - } else { - MetaBlockSplit mb; - BrotliEncoderParams* block_params = params + 1; - *block_params = *params; /* shallow copy */ - InitMetaBlockSplit(&mb); - BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, - block_params, - prev_byte, prev_byte2, - commands, num_commands, - literal_context_mode, - &mb); - if (BROTLI_IS_OOM(m)) goto oom; - { - /* The number of distance symbols effectively used for distance - histograms. It might be less than distance alphabet size - for "Large Window Brotli" (32-bit). */ - BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb); - } - storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503); - if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom; - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, - mask, prev_byte, prev_byte2, - is_last, - block_params, - literal_context_mode, - commands, num_commands, - &mb, - &storage_ix, storage); - if (BROTLI_IS_OOM(m)) goto oom; - if (metablock_size + 4 < (storage_ix >> 3)) { - /* Restore the distance cache and last byte. */ - memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); - storage[0] = (uint8_t)last_bytes; - storage[1] = (uint8_t)(last_bytes >> 8); - storage_ix = last_bytes_bits; - BrotliStoreUncompressedMetaBlock(is_last, input_buffer, - metablock_start, mask, - metablock_size, &storage_ix, storage); - } - DestroyMetaBlockSplit(m, &mb); - } - last_bytes = (uint16_t)(storage[storage_ix >> 3]); - last_bytes_bits = storage_ix & 7u; - metablock_start += metablock_size; - if (metablock_start < input_size) { - prev_byte = input_buffer[metablock_start - 1]; - prev_byte2 = input_buffer[metablock_start - 2]; - } - /* Save the state of the distance cache in case we need to restore it for - emitting an uncompressed block. */ - memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); - - { - const size_t out_size = storage_ix >> 3; - total_out_size += out_size; - if (total_out_size <= max_out_size) { - memcpy(encoded_buffer, storage, out_size); - encoded_buffer += out_size; - } else { - ok = BROTLI_FALSE; - } - } - BROTLI_FREE(m, storage); - BROTLI_FREE(m, commands); - } - - *encoded_size = total_out_size; - DestroyHasher(m, hasher); - BROTLI_FREE(m, hasher); - BrotliEncoderCleanupParams(m, params); - BROTLI_FREE(m, params); - BrotliBootstrapFree(m, m); - return ok; - -oom: - BrotliWipeOutMemoryManager(m); - BrotliBootstrapFree(m, m); - return BROTLI_FALSE; -} - size_t BrotliEncoderMaxCompressedSize(size_t input_size) { /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ size_t num_large_blocks = input_size >> 14; @@ -1539,17 +1262,6 @@ BROTLI_BOOL BrotliEncoderCompress( *encoded_buffer = 6; return BROTLI_TRUE; } - if (quality == 10) { - /* TODO(eustas): Implement this direct path for all quality levels. */ - const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS, - BROTLI_MAX(int, 16, lgwin)); - int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, - encoded_size, encoded_buffer); - if (!ok || (max_out_size && *encoded_size > max_out_size)) { - goto fallback; - } - return BROTLI_TRUE; - } s = BrotliEncoderCreateInstance(0, 0, 0); if (!s) { @@ -1561,6 +1273,7 @@ BROTLI_BOOL BrotliEncoderCompress( uint8_t* next_out = encoded_buffer; size_t total_out = 0; BROTLI_BOOL result = BROTLI_FALSE; + /* TODO(eustas): check that parameters are sane. */ BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); @@ -1612,6 +1325,18 @@ static void InjectBytePaddingBlock(BrotliEncoderState* s) { s->available_out_ += (seal_bits + 7) >> 3; } +/* Fills the |total_out|, if it is not NULL. */ +static void SetTotalOut(BrotliEncoderState* s, size_t* total_out) { + if (total_out) { + /* Saturating conversion uint64_t -> size_t */ + size_t result = (size_t)-1; + if (s->total_out_ < result) { + result = (size_t)s->total_out_; + } + *total_out = result; + } +} + /* Injects padding bits or pushes compressed data to output. Returns false if nothing is done. */ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, @@ -1631,7 +1356,7 @@ static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, s->next_out_ += copy_output_size; s->available_out_ -= copy_output_size; s->total_out_ += copy_output_size; - if (total_out) *total_out = s->total_out_; + SetTotalOut(s, total_out); return BROTLI_TRUE; } @@ -1740,6 +1465,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( if (block_size != 0) { *next_in += block_size; *available_in -= block_size; + s->total_in_ += block_size; } if (inplace) { size_t out_bytes = storage_ix >> 3; @@ -1748,7 +1474,7 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast( *next_out += out_bytes; *available_out -= out_bytes; s->total_out_ += out_bytes; - if (total_out) *total_out = s->total_out_; + SetTotalOut(s, total_out); } else { size_t out_bytes = storage_ix >> 3; s->next_out_ = storage; @@ -1817,6 +1543,7 @@ static BROTLI_BOOL ProcessMetadata( memcpy(*next_out, *next_in, copy); *next_in += copy; *available_in -= copy; + s->total_in_ += copy; /* not actually data input, though */ s->remaining_metadata_bytes_ -= copy; *next_out += copy; *available_out -= copy; @@ -1827,6 +1554,7 @@ static BROTLI_BOOL ProcessMetadata( memcpy(s->next_out_, *next_in, copy); *next_in += copy; *available_in -= copy; + s->total_in_ += copy; /* not actually data input, though */ s->remaining_metadata_bytes_ -= copy; s->available_out_ = copy; } @@ -1854,7 +1582,7 @@ static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) { BROTLI_BOOL BrotliEncoderCompressStream( BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, - const uint8_t** next_in, size_t* available_out,uint8_t** next_out, + const uint8_t** next_in, size_t* available_out, uint8_t** next_out, size_t* total_out) { if (!EnsureInitialized(s)) return BROTLI_FALSE; @@ -1896,6 +1624,7 @@ BROTLI_BOOL BrotliEncoderCompressStream( CopyInputToRingBuffer(s, copy_input_size, *next_in); *next_in += copy_input_size; *available_in -= copy_input_size; + s->total_in_ += copy_input_size; if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size); continue; } @@ -2021,7 +1750,7 @@ void BrotliEncoderDestroyPreparedDictionary( } if (dict->dictionary == NULL) { /* This should never ever happen. */ - } else if (*dict->dictionary == kPreparedDictionaryMagic) { + } else if (*dict->dictionary == kLeanPreparedDictionaryMagic) { DestroyPreparedDictionary( &dict->memory_manager_, (PreparedDictionary*)dict->dictionary); } else if (*dict->dictionary == kSharedDictionaryMagic) { @@ -2029,7 +1758,8 @@ void BrotliEncoderDestroyPreparedDictionary( (SharedEncoderDictionary*)dict->dictionary); BrotliFree(&dict->memory_manager_, dict->dictionary); } else { - /* This should never ever happen. */ + /* There is also kPreparedDictionaryMagic, but such instances should be + * constructed and destroyed by different means. */ } dict->dictionary = NULL; BrotliDestroyManagedDictionary(dict); @@ -2048,7 +1778,8 @@ BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state, dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary; } current = &state->params.dictionary; - if (magic == kPreparedDictionaryMagic) { + if (magic == kPreparedDictionaryMagic || + magic == kLeanPreparedDictionaryMagic) { const PreparedDictionary* prepared = (const PreparedDictionary*)dict; if (!AttachPreparedDictionary(¤t->compound, prepared)) { return BROTLI_FALSE; @@ -2176,7 +1907,15 @@ size_t BrotliEncoderGetPreparedDictionarySize( return sizeof(PreparedDictionary) + dictionary->source_size + (sizeof(uint32_t) << dictionary->slot_bits) + (sizeof(uint16_t) << dictionary->bucket_bits) + - (sizeof(uint32_t) * dictionary->source_offset) + overhead; + (sizeof(uint32_t) * dictionary->num_items) + overhead; + } else if (magic == kLeanPreparedDictionaryMagic) { + const PreparedDictionary* dictionary = + (const PreparedDictionary*)prepared; + /* Keep in sync with step 3 of CreatePreparedDictionary */ + return sizeof(PreparedDictionary) + sizeof(uint8_t*) + + (sizeof(uint32_t) << dictionary->slot_bits) + + (sizeof(uint16_t) << dictionary->bucket_bits) + + (sizeof(uint32_t) * dictionary->num_items) + overhead; } else if (magic == kSharedDictionaryMagic) { const SharedEncoderDictionary* dictionary = (const SharedEncoderDictionary*)prepared; diff --git a/c/enc/encoder_dict.h b/c/enc/encoder_dict.h index b5b591d..b291f98 100644 --- a/c/enc/encoder_dict.h +++ b/c/enc/encoder_dict.h @@ -7,10 +7,11 @@ #ifndef BROTLI_ENC_ENCODER_DICT_H_ #define BROTLI_ENC_ENCODER_DICT_H_ -#include "../common/dictionary.h" -#include "../common/platform.h" #include #include + +#include "../common/dictionary.h" +#include "../common/platform.h" #include "compound_dictionary.h" #include "memory.h" #include "static_dict_lut.h" @@ -103,9 +104,6 @@ typedef struct ContextualEncoderDictionary { BrotliEncoderDictionary* instances_; } ContextualEncoderDictionary; -static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1; -static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2; - typedef struct SharedEncoderDictionary { /* Magic value to distinguish this struct from PreparedDictionary for certain external usages. */ diff --git a/c/enc/entropy_encode.c b/c/enc/entropy_encode.c index b2dcbbd..9aed43b 100644 --- a/c/enc/entropy_encode.c +++ b/c/enc/entropy_encode.c @@ -10,9 +10,10 @@ #include /* memset */ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #if defined(__cplusplus) || defined(c_plusplus) extern "C" { diff --git a/c/enc/entropy_encode.h b/c/enc/entropy_encode.h index 9618e1d..e1c779c 100644 --- a/c/enc/entropy_encode.h +++ b/c/enc/entropy_encode.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ #define BROTLI_ENC_ENTROPY_ENCODE_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/entropy_encode_static.h b/c/enc/entropy_encode_static.h index 2be1c6d..ecff1fe 100644 --- a/c/enc/entropy_encode_static.h +++ b/c/enc/entropy_encode_static.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "write_bits.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/fast_log.h b/c/enc/fast_log.h index 2094f13..f82f4cf 100644 --- a/c/enc/fast_log.h +++ b/c/enc/fast_log.h @@ -11,9 +11,10 @@ #include -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/find_match_length.h b/c/enc/find_match_length.h index f8853a7..dee0414 100644 --- a/c/enc/find_match_length.h +++ b/c/enc/find_match_length.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ #define BROTLI_ENC_FIND_MATCH_LENGTH_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/hash.h b/c/enc/hash.h index 9ead9e6..fc6e334 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -13,10 +13,12 @@ #include /* exit */ #include /* memcmp, memset */ +#include + #include "../common/constants.h" #include "../common/dictionary.h" #include "../common/platform.h" -#include +#include "compound_dictionary.h" #include "encoder_dict.h" #include "fast_log.h" #include "find_match_length.h" @@ -511,7 +513,6 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, const size_t max_length, const size_t distance_offset, const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { - const uint32_t source_offset = self->source_offset; const uint32_t source_size = self->source_size; const size_t boundary = distance_offset - source_size; const uint32_t hash_bits = self->hash_bits; @@ -525,7 +526,7 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const uint32_t* slot_offsets = (uint32_t*)(&self[1]); const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]); const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]); - const uint8_t* source = (uint8_t*)(&items[source_offset]); + const uint8_t* source = NULL; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; score_t best_score = out->score; @@ -539,6 +540,15 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch( const uint32_t head = heads[key]; const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; uint32_t item = (head == 0xFFFF) ? 1 : 0; + + const void* tail = (void*)&items[self->num_items]; + if (self->magic == kPreparedDictionaryMagic) { + source = (const uint8_t*)tail; + } else { + /* kLeanPreparedDictionaryMagic */ + source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } + for (i = 0; i < 4; ++i) { const size_t distance = (size_t)distance_cache[i]; size_t offset; @@ -608,7 +618,6 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length, const size_t max_length, const size_t distance_offset, const size_t max_distance, BackwardMatch* matches, size_t match_limit) { - const uint32_t source_offset = self->source_offset; const uint32_t source_size = self->source_size; const uint32_t hash_bits = self->hash_bits; const uint32_t bucket_bits = self->bucket_bits; @@ -621,7 +630,7 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const uint32_t* slot_offsets = (uint32_t*)(&self[1]); const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]); const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]); - const uint8_t* source = (uint8_t*)(&items[source_offset]); + const uint8_t* source = NULL; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; size_t best_len = min_length; @@ -634,6 +643,15 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; uint32_t item = (head == 0xFFFF) ? 1 : 0; size_t found = 0; + + const void* tail = (void*)&items[self->num_items]; + if (self->magic == kPreparedDictionaryMagic) { + source = (const uint8_t*)tail; + } else { + /* kLeanPreparedDictionaryMagic */ + source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); + } + while (item == 0) { size_t offset; size_t distance; diff --git a/c/enc/histogram.h b/c/enc/histogram.h index b213a8b..d1abd97 100644 --- a/c/enc/histogram.h +++ b/c/enc/histogram.h @@ -11,10 +11,11 @@ #include /* memset */ +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "block_splitter.h" #include "command.h" diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c index 4e5068e..2ac847f 100644 --- a/c/enc/literal_cost.c +++ b/c/enc/literal_cost.c @@ -11,8 +11,9 @@ #include /* memset */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "fast_log.h" #include "utf8_util.h" diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h index efc8e17..284a8e5 100644 --- a/c/enc/literal_cost.h +++ b/c/enc/literal_cost.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_LITERAL_COST_H_ #define BROTLI_ENC_LITERAL_COST_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/memory.c b/c/enc/memory.c index f3afebc..51e1b7f 100644 --- a/c/enc/memory.c +++ b/c/enc/memory.c @@ -12,9 +12,10 @@ #include /* exit, free, malloc */ #include /* memcpy */ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/memory.h b/c/enc/memory.h index 13b23d4..cbe4e30 100644 --- a/c/enc/memory.h +++ b/c/enc/memory.h @@ -11,9 +11,10 @@ #include /* memcpy */ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/metablock.c b/c/enc/metablock.c index 47b577b..0c5c078 100644 --- a/c/enc/metablock.c +++ b/c/enc/metablock.c @@ -9,10 +9,11 @@ #include "metablock.h" +#include + #include "../common/constants.h" #include "../common/context.h" #include "../common/platform.h" -#include #include "bit_cost.h" #include "block_splitter.h" #include "cluster.h" diff --git a/c/enc/metablock.h b/c/enc/metablock.h index 50bd294..db38f8f 100644 --- a/c/enc/metablock.h +++ b/c/enc/metablock.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ +#include + #include "../common/context.h" #include "../common/platform.h" -#include #include "block_splitter.h" #include "command.h" #include "histogram.h" diff --git a/c/enc/params.h b/c/enc/params.h index cc74279..baeb319 100644 --- a/c/enc/params.h +++ b/c/enc/params.h @@ -10,6 +10,7 @@ #define BROTLI_ENC_PARAMS_H_ #include + #include "encoder_dict.h" typedef struct BrotliHasherParams { diff --git a/c/enc/prefix.h b/c/enc/prefix.h index b58d50b..0f006f1 100644 --- a/c/enc/prefix.h +++ b/c/enc/prefix.h @@ -10,9 +10,10 @@ #ifndef BROTLI_ENC_PREFIX_H_ #define BROTLI_ENC_PREFIX_H_ +#include + #include "../common/constants.h" #include "../common/platform.h" -#include #include "fast_log.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/quality.h b/c/enc/quality.h index 392ab00..99891b4 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -10,8 +10,9 @@ #ifndef BROTLI_ENC_QUALITY_H_ #define BROTLI_ENC_QUALITY_H_ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "params.h" #define FAST_ONE_PASS_COMPRESSION_QUALITY 0 diff --git a/c/enc/ringbuffer.h b/c/enc/ringbuffer.h index 0db88cf..27245b7 100644 --- a/c/enc/ringbuffer.h +++ b/c/enc/ringbuffer.h @@ -11,8 +11,9 @@ #include /* memcpy */ -#include "../common/platform.h" #include + +#include "../common/platform.h" #include "memory.h" #include "quality.h" diff --git a/c/enc/state.h b/c/enc/state.h new file mode 100644 index 0000000..cb82987 --- /dev/null +++ b/c/enc/state.h @@ -0,0 +1,104 @@ +/* Copyright 2022 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Encoder state. */ + +#ifndef BROTLI_ENC_STATE_H_ +#define BROTLI_ENC_STATE_H_ + +#include + +#include "command.h" +#include "compress_fragment.h" +#include "compress_fragment_two_pass.h" +#include "hash.h" +#include "memory.h" +#include "params.h" +#include "ringbuffer.h" + +typedef enum BrotliEncoderStreamState { + /* Default state. */ + BROTLI_STREAM_PROCESSING = 0, + /* Intermediate state; after next block is emitted, byte-padding should be + performed before getting back to default state. */ + BROTLI_STREAM_FLUSH_REQUESTED = 1, + /* Last metablock was produced; no more input is acceptable. */ + BROTLI_STREAM_FINISHED = 2, + /* Flushing compressed block and writing meta-data block header. */ + BROTLI_STREAM_METADATA_HEAD = 3, + /* Writing metadata block body. */ + BROTLI_STREAM_METADATA_BODY = 4 +} BrotliEncoderStreamState; + +typedef enum BrotliEncoderFlintState { + BROTLI_FLINT_NEEDS_2_BYTES = 2, + BROTLI_FLINT_NEEDS_1_BYTE = 1, + BROTLI_FLINT_WAITING_FOR_PROCESSING = 0, + BROTLI_FLINT_WAITING_FOR_FLUSHING = -1, + BROTLI_FLINT_DONE = -2 +} BrotliEncoderFlintState; + +typedef struct BrotliEncoderStateStruct { + BrotliEncoderParams params; + + MemoryManager memory_manager_; + + uint64_t input_pos_; + RingBuffer ringbuffer_; + size_t cmd_alloc_size_; + Command* commands_; + size_t num_commands_; + size_t num_literals_; + size_t last_insert_len_; + uint64_t last_flush_pos_; + uint64_t last_processed_pos_; + int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; + int saved_dist_cache_[4]; + uint16_t last_bytes_; + uint8_t last_bytes_bits_; + /* "Flint" is a tiny uncompressed block emitted before the continuation + block to unwire literal context from previous data. Despite being int8_t, + field is actually BrotliEncoderFlintState enum. */ + int8_t flint_; + uint8_t prev_byte_; + uint8_t prev_byte2_; + size_t storage_size_; + uint8_t* storage_; + + Hasher hasher_; + + /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ + int small_table_[1 << 10]; /* 4KiB */ + int* large_table_; /* Allocated only when needed */ + size_t large_table_size_; + + BrotliOnePassArena* one_pass_arena_; + BrotliTwoPassArena* two_pass_arena_; + + /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ + uint32_t* command_buf_; + uint8_t* literal_buf_; + + uint64_t total_in_; + uint8_t* next_out_; + size_t available_out_; + uint64_t total_out_; + /* Temporary buffer for padding flush bits or metadata block header / body. */ + union { + uint64_t u64[2]; + uint8_t u8[16]; + } tiny_buf_; + uint32_t remaining_metadata_bytes_; + BrotliEncoderStreamState stream_state_; + + BROTLI_BOOL is_last_block_emitted_; + BROTLI_BOOL is_initialized_; +} BrotliEncoderStateStruct; + +typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal; +#define BrotliEncoderState BrotliEncoderStateInternal + +#endif // BROTLI_ENC_STATE_H_ diff --git a/c/enc/static_dict.h b/c/enc/static_dict.h index f572bc6..ab83220 100644 --- a/c/enc/static_dict.h +++ b/c/enc/static_dict.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_STATIC_DICT_H_ #define BROTLI_ENC_STATIC_DICT_H_ +#include + #include "../common/dictionary.h" #include "../common/platform.h" -#include #include "encoder_dict.h" #if defined(__cplusplus) || defined(c_plusplus) diff --git a/c/enc/utf8_util.h b/c/enc/utf8_util.h index 8fda80c..a38a953 100644 --- a/c/enc/utf8_util.h +++ b/c/enc/utf8_util.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_UTF8_UTIL_H_ #define BROTLI_ENC_UTF8_UTIL_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h index f6f88b4..242754b 100644 --- a/c/enc/write_bits.h +++ b/c/enc/write_bits.h @@ -9,9 +9,10 @@ #ifndef BROTLI_ENC_WRITE_BITS_H_ #define BROTLI_ENC_WRITE_BITS_H_ -#include "../common/platform.h" #include +#include "../common/platform.h" + #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif diff --git a/c/include/brotli/port.h b/c/include/brotli/port.h index a681ac4..0d50019 100644 --- a/c/include/brotli/port.h +++ b/c/include/brotli/port.h @@ -224,14 +224,6 @@ #define BROTLI_HAS_FEATURE(feature) (0) #endif -#if defined(ADDRESS_SANITIZER) || BROTLI_HAS_FEATURE(address_sanitizer) || \ - defined(THREAD_SANITIZER) || BROTLI_HAS_FEATURE(thread_sanitizer) || \ - defined(MEMORY_SANITIZER) || BROTLI_HAS_FEATURE(memory_sanitizer) -#define BROTLI_SANITIZED 1 -#else -#define BROTLI_SANITIZED 0 -#endif - #if defined(_WIN32) || defined(__CYGWIN__) #define BROTLI_PUBLIC #elif BROTLI_GNUC_VERSION_CHECK(3, 3, 0) || \ diff --git a/c/tools/brotli.c b/c/tools/brotli.c index 0ea45d3..80ead72 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -20,11 +20,12 @@ #include #include -#include "../common/constants.h" -#include "../common/version.h" #include #include +#include "../common/constants.h" +#include "../common/version.h" + #if !defined(_WIN32) #include #include diff --git a/c/tools/brotli.md b/c/tools/brotli.md index 895c955..cb6d6f3 100644 --- a/c/tools/brotli.md +++ b/c/tools/brotli.md @@ -1,15 +1,15 @@ -brotli(1) -- brotli, unbrotli - compress or decompress files -================================================================ +# NAME -SYNOPSIS --------- +brotli(1) -- brotli, unbrotli - compress or decompress files + +# SYNOPSIS `brotli` [*OPTION|FILE*]... `unbrotli` is equivalent to `brotli --decompress` -DESCRIPTION ------------ +# DESCRIPTION + `brotli` is a generic-purpose lossless compression algorithm that compresses data using a combination of a modern variant of the **LZ77** algorithm, Huffman coding and 2-nd order context modeling, with a compression ratio comparable to @@ -52,8 +52,7 @@ Default suffix is `.br`, but it could be specified with `--suffix` option. Conflicting or duplicate _options_ are not allowed. -OPTIONS -------- +# OPTIONS * `-#`: compression level (0-9); bigger values cause denser, but slower compression @@ -81,8 +80,8 @@ OPTIONS increase output verbosity * `-w NUM`, `--lgwin=NUM`: set LZ77 window size (0, 10-24) (default: 24); window size is - `(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger - windows size improve density; decoder might require up to window size + `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value; + bigger windows size improve density; decoder might require up to window size memory to operate * `-D FILE`, `--dictionary=FILE`: use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for @@ -94,8 +93,7 @@ OPTIONS * `-Z`, `--best`: use best compression level (default); same as "`-q 11`" -SEE ALSO --------- +# SEE ALSO `brotli` file format is defined in [RFC 7932](https://www.ietf.org/rfc/rfc7932.txt). @@ -105,6 +103,6 @@ SEE ALSO Mailing list: https://groups.google.com/forum/#!forum/brotli -BUGS ----- +# BUGS + Report bugs at: https://github.com/google/brotli/issues diff --git a/docs/brotli.1 b/docs/brotli.1 index 1970606..7ca1355 100644 --- a/docs/brotli.1 +++ b/docs/brotli.1 @@ -1,136 +1,129 @@ -.TH "BROTLI" "1" "August 2021" "" "User commands" -.SH "NAME" -\fBbrotli\fR \- brotli, unbrotli \- compress or decompress files +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "brotli" "1" "August 14 2021" "brotli 1.0.9" "User Manual" +.hy +.SH NAME +.PP +brotli(1) -- brotli, unbrotli - compress or decompress files .SH SYNOPSIS -.P -\fBbrotli\fP [\fIOPTION|FILE\fR]\.\.\. -.P -\fBunbrotli\fP is equivalent to \fBbrotli \-\-decompress\fP +.PP +\f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&... +.PP +\f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R] .SH DESCRIPTION -.P -\fBbrotli\fP is a generic\-purpose lossless compression algorithm that compresses -data using a combination of a modern variant of the \fBLZ77\fR algorithm, Huffman -coding and 2\-nd order context modeling, with a compression ratio comparable to -the best currently available general\-purpose compression methods\. It is similar -in speed with deflate but offers more dense compression\. -.P -\fBbrotli\fP command line syntax similar to \fBgzip (1)\fP and \fBzstd (1)\fP\|\. -Unlike \fBgzip (1)\fP, source files are preserved by default\. It is possible to -remove them after processing by using the \fB\-\-rm\fP \fIoption\fR\|\. -.P -Arguments that look like "\fB\-\-name\fP" or "\fB\-\-name=value\fP" are \fIoptions\fR\|\. Every -\fIoption\fR has a short form "\fB\-x\fP" or "\fB\-x value\fP"\. Multiple short form \fIoptions\fR -could be coalesced: -.RS 0 -.IP \(bu 2 -"\fB\-\-decompress \-\-stdout \-\-suffix=\.b\fP" works the same as -.IP \(bu 2 -"\fB\-d \-s \-S \.b\fP" and -.IP \(bu 2 -"\fB\-dsS \.b\fP" - -.RE -.P -\fBbrotli\fP has 3 operation modes: -.RS 0 -.IP \(bu 2 +.PP +\f[B]brotli\f[R] is a generic-purpose lossless compression algorithm +that compresses data using a combination of a modern variant of the +\f[B]LZ77\f[R] algorithm, Huffman coding and 2-nd order context +modeling, with a compression ratio comparable to the best currently +available general-purpose compression methods. +It is similar in speed with deflate but offers more dense compression. +.PP +\f[B]brotli\f[R] command line syntax similar to \f[B]gzip (1)\f[R] and +\f[B]zstd (1)\f[R]. +Unlike \f[B]gzip (1)\f[R], source files are preserved by default. +It is possible to remove them after processing by using the +\f[B]--rm\f[R] \f[I]option\f[R]. +.PP +Arguments that look like \[lq]\f[B]--name\f[R]\[rq] or +\[lq]\f[B]--name=value\f[R]\[rq] are \f[I]options\f[R]. +Every \f[I]option\f[R] has a short form \[lq]\f[B]-x\f[R]\[rq] or +\[lq]\f[B]-x value\f[R]\[rq]. +Multiple short form \f[I]options\f[R] could be coalesced: +.IP \[bu] 2 +\[lq]\f[B]--decompress --stdout --suffix=.b\f[R]\[rq] works the same as +.IP \[bu] 2 +\[lq]\f[B]-d -s -S .b\f[R]\[rq] and +.IP \[bu] 2 +\[lq]\f[B]-dsS .b\f[R]\[rq] +.PP +\f[B]brotli\f[R] has 3 operation modes: +.IP \[bu] 2 default mode is compression; -.IP \(bu 2 -\fB\-\-decompress\fP option activates decompression mode; -.IP \(bu 2 -\fB\-\-test\fP option switches to integrity test mode; this option is equivalent to -"\fB\-\-decompress \-\-stdout\fP" except that the decompressed data is discarded -instead of being written to standard output\. - -.RE -.P -Every non\-option argument is a \fIfile\fR entry\. If no \fIfiles\fR are given or \fIfile\fR -is "\fB\-\fP", \fBbrotli\fP reads from standard input\. All arguments after "\fB\-\-\fP" are -\fIfile\fR entries\. -.P -Unless \fB\-\-stdout\fP or \fB\-\-output\fP is specified, \fIfiles\fR are written to a new file -whose name is derived from the source \fIfile\fR name: -.RS 0 -.IP \(bu 2 -when compressing, a suffix is appended to the source filename to -get the target filename -.IP \(bu 2 -when decompressing, a suffix is removed from the source filename to -get the target filename - -.RE -.P -Default suffix is \fB\|\.br\fP, but it could be specified with \fB\-\-suffix\fP option\. -.P -Conflicting or duplicate \fIoptions\fR are not allowed\. +.IP \[bu] 2 +\f[B]--decompress\f[R] option activates decompression mode; +.IP \[bu] 2 +\f[B]--test\f[R] option switches to integrity test mode; this option is +equivalent to \[lq]\f[B]--decompress --stdout\f[R]\[rq] except that the +decompressed data is discarded instead of being written to standard +output. +.PP +Every non-option argument is a \f[I]file\f[R] entry. +If no \f[I]files\f[R] are given or \f[I]file\f[R] is +\[lq]\f[B]-\f[R]\[rq], \f[B]brotli\f[R] reads from standard input. +All arguments after \[lq]\f[B]--\f[R]\[rq] are \f[I]file\f[R] entries. +.PP +Unless \f[B]--stdout\f[R] or \f[B]--output\f[R] is specified, +\f[I]files\f[R] are written to a new file whose name is derived from the +source \f[I]file\f[R] name: +.IP \[bu] 2 +when compressing, a suffix is appended to the source filename to get the +target filename +.IP \[bu] 2 +when decompressing, a suffix is removed from the source filename to get +the target filename +.PP +Default suffix is \f[B].br\f[R], but it could be specified with +\f[B]--suffix\f[R] option. +.PP +Conflicting or duplicate \f[I]options\f[R] are not allowed. .SH OPTIONS -.RS 0 -.IP \(bu 2 -\fB\-#\fP: - compression level (0\-9); bigger values cause denser, but slower compression -.IP \(bu 2 -\fB\-c\fP, \fB\-\-stdout\fP: - write on standard output -.IP \(bu 2 -\fB\-d\fP, \fB\-\-decompress\fP: - decompress mode -.IP \(bu 2 -\fB\-f\fP, \fB\-\-force\fP: - force output file overwrite -.IP \(bu 2 -\fB\-h\fP, \fB\-\-help\fP: - display this help and exit -.IP \(bu 2 -\fB\-j\fP, \fB\-\-rm\fP: - remove source file(s); \fBgzip (1)\fP\-like behaviour -.IP \(bu 2 -\fB\-k\fP, \fB\-\-keep\fP: - keep source file(s); \fBzstd (1)\fP\-like behaviour -.IP \(bu 2 -\fB\-n\fP, \fB\-\-no\-copy\-stat\fP: - do not copy source file(s) attributes -.IP \(bu 2 -\fB\-o FILE\fP, \fB\-\-output=FILE\fP - output file; valid only if there is a single input entry -.IP \(bu 2 -\fB\-q NUM\fP, \fB\-\-quality=NUM\fP: - compression level (0\-11); bigger values cause denser, but slower compression -.IP \(bu 2 -\fB\-t\fP, \fB\-\-test\fP: - test file integrity mode -.IP \(bu 2 -\fB\-v\fP, \fB\-\-verbose\fP: - increase output verbosity -.IP \(bu 2 -\fB\-w NUM\fP, \fB\-\-lgwin=NUM\fP: - set LZ77 window size (0, 10\-24) (default: 24); window size is - \fB(2**NUM \- 16)\fP; 0 lets compressor decide over the optimal value; bigger - windows size improve density; decoder might require up to window size - memory to operate -.IP \(bu 2 -\fB\-D FILE\fP, \fB\-\-dictionary=FILE\fP: - use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for - compression and decompression -.IP \(bu 2 -\fB\-S SUF\fP, \fB\-\-suffix=SUF\fP: - output file suffix (default: \fB\|\.br\fP) -.IP \(bu 2 -\fB\-V\fP, \fB\-\-version\fP: - display version and exit -.IP \(bu 2 -\fB\-Z\fP, \fB\-\-best\fP: - use best compression level (default); same as "\fB\-q 11\fP" - -.RE +.IP \[bu] 2 +\f[B]-#\f[R]: compression level (0-9); bigger values cause denser, but +slower compression +.IP \[bu] 2 +\f[B]-c\f[R], \f[B]--stdout\f[R]: write on standard output +.IP \[bu] 2 +\f[B]-d\f[R], \f[B]--decompress\f[R]: decompress mode +.IP \[bu] 2 +\f[B]-f\f[R], \f[B]--force\f[R]: force output file overwrite +.IP \[bu] 2 +\f[B]-h\f[R], \f[B]--help\f[R]: display this help and exit +.IP \[bu] 2 +\f[B]-j\f[R], \f[B]--rm\f[R]: remove source file(s); \f[B]gzip +(1)\f[R]-like behaviour +.IP \[bu] 2 +\f[B]-k\f[R], \f[B]--keep\f[R]: keep source file(s); \f[B]zstd +(1)\f[R]-like behaviour +.IP \[bu] 2 +\f[B]-n\f[R], \f[B]--no-copy-stat\f[R]: do not copy source file(s) +attributes +.IP \[bu] 2 +\f[B]-o FILE\f[R], \f[B]--output=FILE\f[R] output file; valid only if +there is a single input entry +.IP \[bu] 2 +\f[B]-q NUM\f[R], \f[B]--quality=NUM\f[R]: compression level (0-11); +bigger values cause denser, but slower compression +.IP \[bu] 2 +\f[B]-t\f[R], \f[B]--test\f[R]: test file integrity mode +.IP \[bu] 2 +\f[B]-v\f[R], \f[B]--verbose\f[R]: increase output verbosity +.IP \[bu] 2 +\f[B]-w NUM\f[R], \f[B]--lgwin=NUM\f[R]: set LZ77 window size (0, 10-24) +(default: 24); window size is \f[B](pow(2, NUM) - 16)\f[R]; 0 lets +compressor decide over the optimal value; bigger windows size improve +density; decoder might require up to window size memory to operate +.IP \[bu] 2 +\f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77) +dictionary; same dictionary MUST be used both for compression and +decompression +.IP \[bu] 2 +\f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default: +\f[B].br\f[R]) +.IP \[bu] 2 +\f[B]-V\f[R], \f[B]--version\f[R]: display version and exit +.IP \[bu] 2 +\f[B]-Z\f[R], \f[B]--best\f[R]: use best compression level (default); +same as \[lq]\f[B]-q 11\f[R]\[rq] .SH SEE ALSO -.P -\fBbrotli\fP file format is defined in -RFC 7932 \fIhttps://www\.ietf\.org/rfc/rfc7932\.txt\fR\|\. -.P -\fBbrotli\fP is open\-sourced under the -MIT License \fIhttps://opensource\.org/licenses/MIT\fR\|\. -.P -Mailing list: https://groups\.google\.com/forum/#!forum/brotli +.PP +\f[B]brotli\f[R] file format is defined in RFC +7932 (https://www.ietf.org/rfc/rfc7932.txt). +.PP +\f[B]brotli\f[R] is open-sourced under the MIT +License (https://opensource.org/licenses/MIT). +.PP +Mailing list: https://groups.google.com/forum/#!forum/brotli .SH BUGS -.P -Report bugs at: https://github\.com/google/brotli/issues +.PP +Report bugs at: https://github.com/google/brotli/issues diff --git a/go/WORKSPACE b/go/WORKSPACE index 03d38da..570e250 100644 --- a/go/WORKSPACE +++ b/go/WORKSPACE @@ -9,10 +9,10 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "io_bazel_rules_go", - sha256 = "69de5c704a05ff37862f7e0f5534d4f479418afc21806c887db544a316f3cb6b", + sha256 = "2b1641428dff9018f9e85c0384f03ec6c10660d935b750e3fa1492a281a53b0f", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz", - "https://github.com/bazelbuild/rules_go/releases/download/v0.27.0/rules_go-v0.27.0.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.29.0/rules_go-v0.29.0.zip", + "https://github.com/bazelbuild/rules_go/releases/download/v0.29.0/rules_go-v0.29.0.zip", ], ) @@ -20,4 +20,17 @@ load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_depe go_rules_dependencies() -go_register_toolchains(version = "1.16") +go_register_toolchains(version = "1.17.1") + +http_archive( + name = "bazel_gazelle", + sha256 = "de69a09dc70417580aabf20a28619bb3ef60d038470c7cf8442fafcf627c21cb", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz", + ], +) + +load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") + +gazelle_dependencies() diff --git a/java/org/brotli/dec/BrotliInputStream.java b/java/org/brotli/dec/BrotliInputStream.java index 5eca238..7bbe2f6 100644 --- a/java/org/brotli/dec/BrotliInputStream.java +++ b/java/org/brotli/dec/BrotliInputStream.java @@ -18,6 +18,14 @@ public class BrotliInputStream extends InputStream { public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 256; + /** + * Value expected by InputStream contract when stream is over. + * + * In Java it is -1. + * In C# it is 0 (should be patched during transpilation). + */ + private static final int END_OF_STREAM_MARKER = -1; + /** * Internal buffer used for efficient byte-by-byte reading. */ @@ -112,7 +120,8 @@ public class BrotliInputStream extends InputStream { if (bufferOffset >= remainingBufferBytes) { remainingBufferBytes = read(buffer, 0, buffer.length); bufferOffset = 0; - if (remainingBufferBytes == -1) { + if (remainingBufferBytes == END_OF_STREAM_MARKER) { + // Both Java and C# return the same value for EOF on single-byte read. return -1; } } @@ -151,10 +160,9 @@ public class BrotliInputStream extends InputStream { state.outputLength = destLen; state.outputUsed = 0; Decode.decompress(state); - if (state.outputUsed == 0) { - return -1; - } - return state.outputUsed + copyLen; + copyLen += state.outputUsed; + copyLen = (copyLen > 0) ? copyLen : END_OF_STREAM_MARKER; + return copyLen; } catch (BrotliRuntimeException ex) { throw new IOException("Brotli stream decoding failed", ex); } diff --git a/java/org/brotli/dec/Decode.java b/java/org/brotli/dec/Decode.java index b139ef7..c386995 100644 --- a/java/org/brotli/dec/Decode.java +++ b/java/org/brotli/dec/Decode.java @@ -919,6 +919,7 @@ final class Decode { private static int writeRingBuffer(State s) { int toWrite = Math.min(s.outputLength - s.outputUsed, s.ringBufferBytesReady - s.ringBufferBytesWritten); + // TODO(eustas): DCHECK(toWrite >= 0) if (toWrite != 0) { System.arraycopy(s.ringBuffer, s.ringBufferBytesWritten, s.output, s.outputOffset + s.outputUsed, toWrite); diff --git a/java/org/brotli/dec/build_defs.bzl b/java/org/brotli/dec/build_defs.bzl index fd23a0d..d4f280b 100644 --- a/java/org/brotli/dec/build_defs.bzl +++ b/java/org/brotli/dec/build_defs.bzl @@ -5,13 +5,20 @@ _TEST_JVM_FLAGS = [ ] def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs): - """test duplication rule that creates 32/64-bit test pair.""" + """test duplication rule that creates 32/64-bit test pair. + + Args: + name: target name prefix + main_class: override for test_class + jvm_flags: base Java VM options + **kwargs: pass-through + """ if jvm_flags == None: jvm_flags = [] jvm_flags = jvm_flags + _TEST_JVM_FLAGS - test_package = native.package_name().replace("/", ".").replace("javatests.", "") + test_package = native.package_name().replace("/", ".").replace("third_party.brotli.java.", "") if main_class == None: test_class = test_package + "." + name @@ -23,6 +30,7 @@ def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs): main_class = main_class, test_class = test_class, jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=true"], + visibility = ["//visibility:private"], **kwargs ) @@ -31,5 +39,6 @@ def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs): main_class = main_class, test_class = test_class, jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=false"], + visibility = ["//visibility:private"], **kwargs ) diff --git a/java/org/brotli/wrapper/dec/decoder_jni.cc b/java/org/brotli/wrapper/dec/decoder_jni.cc index 3328a1a..42e6bae 100644 --- a/java/org/brotli/wrapper/dec/decoder_jni.cc +++ b/java/org/brotli/wrapper/dec/decoder_jni.cc @@ -4,12 +4,12 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -#include "decoder_jni.h" - -#include +#include "decoder_jni.h" // NOLINT: build/include #include +#include + namespace { /* A structure used to persist the decoder's state in between calls. */ typedef struct DecoderHandle { diff --git a/java/org/brotli/wrapper/dec/decoder_jni_onload.cc b/java/org/brotli/wrapper/dec/decoder_jni_onload.cc index 2f93de0..b69f954 100644 --- a/java/org/brotli/wrapper/dec/decoder_jni_onload.cc +++ b/java/org/brotli/wrapper/dec/decoder_jni_onload.cc @@ -6,7 +6,7 @@ #include -#include "decoder_jni.h" +#include "decoder_jni.h" // NOLINT: build/include #ifdef __cplusplus extern "C" { @@ -36,7 +36,7 @@ JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) { } jclass clazz = - env->FindClass("com/google/compression/brotli/wrapper/dec/DecoderJNI"); + env->FindClass("org/brotli/wrapper/dec/DecoderJNI"); if (clazz == nullptr) { return -1; } diff --git a/java/org/brotli/wrapper/enc/EncoderJNI.java b/java/org/brotli/wrapper/enc/EncoderJNI.java index 3e77207..b8e32d2 100644 --- a/java/org/brotli/wrapper/enc/EncoderJNI.java +++ b/java/org/brotli/wrapper/enc/EncoderJNI.java @@ -30,8 +30,10 @@ class EncoderJNI { private static class PreparedDictionaryImpl implements PreparedDictionary { private ByteBuffer data; + /** Reference to (non-copied) LZ data. */ + private ByteBuffer rawData; - private PreparedDictionaryImpl(ByteBuffer data) { + private PreparedDictionaryImpl(ByteBuffer data, ByteBuffer rawData) { this.data = data; } @@ -45,6 +47,7 @@ class EncoderJNI { try { ByteBuffer data = this.data; this.data = null; + this.rawData = null; nativeDestroyDictionary(data); } finally { super.finalize(); @@ -66,7 +69,7 @@ class EncoderJNI { if (dictionaryData == null) { throw new IllegalStateException("OOM"); } - return new PreparedDictionaryImpl(dictionaryData); + return new PreparedDictionaryImpl(dictionaryData, dictionary); } static class Wrapper { diff --git a/java/org/brotli/wrapper/enc/encoder_jni.cc b/java/org/brotli/wrapper/enc/encoder_jni.cc index adcc7bf..796908b 100644 --- a/java/org/brotli/wrapper/enc/encoder_jni.cc +++ b/java/org/brotli/wrapper/enc/encoder_jni.cc @@ -4,12 +4,11 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ +#include #include #include -#include - namespace { /* A structure used to persist the encoder's state in between calls. */ typedef struct EncoderHandle { diff --git a/python/_brotli.cc b/python/_brotli.cc index d4075bd..54c7363 100644 --- a/python/_brotli.cc +++ b/python/_brotli.cc @@ -2,11 +2,12 @@ #include #include #include -#include -#include "../common/version.h" + #include #include +#include + #if PY_MAJOR_VERSION >= 3 #define PyInt_Check PyLong_Check #define PyInt_AsLong PyLong_AsLong @@ -745,8 +746,9 @@ PyMODINIT_FUNC INIT_BROTLI(void) { PyModule_AddIntConstant(m, "MODE_FONT", (int) BROTLI_MODE_FONT); char version[16]; + uint32_t decoderVersion = BrotliDecoderVersion(); snprintf(version, sizeof(version), "%d.%d.%d", - BROTLI_VERSION >> 24, (BROTLI_VERSION >> 12) & 0xFFF, BROTLI_VERSION & 0xFFF); + decoderVersion >> 24, (decoderVersion >> 12) & 0xFFF, decoderVersion & 0xFFF); PyModule_AddStringConstant(m, "__version__", version); RETURN_BROTLI; diff --git a/research/brotli_decoder.c b/research/brotli_decoder.c index 3febcbd..f50fb34 100644 --- a/research/brotli_decoder.c +++ b/research/brotli_decoder.c @@ -7,6 +7,8 @@ #include #include +#include + #if !defined(_WIN32) #include #else @@ -18,8 +20,6 @@ #endif #endif -#include - #define BUFFER_SIZE (1u << 20) typedef struct Context { diff --git a/scripts/sources.lst b/scripts/sources.lst index dd50a45..2848cc5 100644 --- a/scripts/sources.lst +++ b/scripts/sources.lst @@ -97,6 +97,7 @@ BROTLI_ENC_H = \ c/enc/prefix.h \ c/enc/quality.h \ c/enc/ringbuffer.h \ + c/enc/state.h \ c/enc/static_dict.h \ c/enc/static_dict_lut.h \ c/enc/utf8_util.h \