Update
 * make the zopflification aware of `NDIRECT`, `NPOSTFIX`
   (better compression in `font` mode)
 * add small and simple decoder tool
 * fix typo
 * Java: wrapper: make decoder channel more async-friendly

Ramp up version to 1.0.3 / 1.0.3
This commit is contained in:
Eugene Kliuchnikov 2018-03-02 15:49:58 +01:00 committed by GitHub
parent 35e69fc7cf
commit 533843e354
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 150 additions and 64 deletions

View File

@ -1,4 +1,4 @@
/* transforms is a part of ABI, nut not API.
/* transforms is a part of ABI, but not API.
It means that there are some functions that are supposed to be in "common"
library, but header itself is not placed into include/brotli. This way,

View File

@ -14,13 +14,13 @@
BrotliEncoderVersion methods. */
/* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */
#define BROTLI_VERSION 0x1000002
#define BROTLI_VERSION 0x1000003
/* This macro is used by build system to produce Libtool-friendly soname. See
https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
*/
/* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */
#define BROTLI_ABI_VERSION 0x1002000
#define BROTLI_ABI_VERSION 0x1003000
#endif /* BROTLI_COMMON_VERSION_H_ */

View File

@ -18,6 +18,7 @@
#include "./find_match_length.h"
#include "./literal_cost.h"
#include "./memory.h"
#include "./params.h"
#include "./prefix.h"
#include "./quality.h"
@ -25,9 +26,7 @@
extern "C" {
#endif
#define BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE ( \
BROTLI_NUM_DISTANCE_SHORT_CODES + (2 * BROTLI_LARGE_MAX_DISTANCE_BITS))
/* BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE == 74 */
#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
@ -76,7 +75,8 @@ static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
typedef struct ZopfliCostModel {
/* The insert and copy length symbols. */
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
float cost_dist_[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
float* cost_dist_;
uint32_t distance_alphabet_size;
/* Cumulative costs of literals per position in the stream. */
float* literal_costs_;
float min_cost_cmd_;
@ -84,14 +84,18 @@ typedef struct ZopfliCostModel {
} ZopfliCostModel;
static void InitZopfliCostModel(
MemoryManager* m, ZopfliCostModel* self, size_t num_bytes) {
MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
size_t num_bytes) {
self->num_bytes_ = num_bytes;
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size);
self->distance_alphabet_size = dist->alphabet_size;
if (BROTLI_IS_OOM(m)) return;
}
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
BROTLI_FREE(m, self->literal_costs_);
BROTLI_FREE(m, self->cost_dist_);
}
static void SetCost(const uint32_t* histogram, size_t histogram_size,
@ -135,7 +139,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
size_t last_insert_len) {
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
uint32_t histogram_dist[BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE];
uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
size_t pos = position - last_insert_len;
float min_cost_cmd = kInfinity;
@ -167,7 +171,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
cost_literal);
SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
cost_cmd);
SetCost(histogram_dist, BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE, BROTLI_FALSE,
SetCost(histogram_dist, self->distance_alphabet_size, BROTLI_FALSE,
self->cost_dist_);
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
@ -210,7 +214,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
}
for (i = 0; i < BROTLI_SIMPLE_DISTANCE_ALPHABET_SIZE; ++i) {
for (i = 0; i < self->distance_alphabet_size; ++i) {
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
}
self->min_cost_cmd_ = (float)FastLog2(11);
@ -505,7 +509,9 @@ static size_t UpdateNodes(
uint32_t distnumextra;
float dist_cost;
size_t max_match_len;
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
PrefixEncodeCopyDistance(
dist_code, params->dist.num_direct_distance_codes,
params->dist.distance_postfix_bits, &dist_symbol, &distextra);
distnumextra = dist_symbol >> 10;
dist_cost = base_cost + (float)distnumextra +
ZopfliCostModelGetDistanceCost(model, dist_symbol & 0x3FF);
@ -566,7 +572,6 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
uint32_t offset = nodes[0].u.next;
size_t i;
size_t gap = 0;
BROTLI_UNUSED(params);
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
@ -584,7 +589,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
BROTLI_MIN(size_t, block_start + pos, max_backward_limit);
BROTLI_BOOL is_dictionary = TO_BROTLI_BOOL(distance > max_distance + gap);
size_t dist_code = ZopfliNodeDistanceCode(next);
InitCommand(&commands[i], insert_length,
InitCommand(&commands[i], &params->dist, insert_length,
copy_length, (int)len_code - (int)copy_length, dist_code);
if (!is_dictionary && dist_code > 0) {
@ -663,7 +668,7 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
size_t lz_matches_offset = 0;
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitZopfliCostModel(m, &model, num_bytes);
InitZopfliCostModel(m, &model, &params->dist, num_bytes);
if (BROTLI_IS_OOM(m)) return 0;
ZopfliCostModelSetFromLiteralCosts(
&model, position, ringbuffer, ringbuffer_mask);
@ -788,7 +793,7 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
orig_num_commands = *num_commands;
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
InitZopfliCostModel(m, &model, num_bytes);
InitZopfliCostModel(m, &model, &params->dist, num_bytes);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < 2; i++) {
BrotliInitZopfliNodes(nodes, num_bytes + 1);

View File

@ -89,8 +89,8 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
dist_cache[0] = (int)sr.distance;
FN(PrepareDistanceCache)(hasher, dist_cache);
}
InitCommand(commands++, insert_length, sr.len, sr.len_code_delta,
distance_code);
InitCommand(commands++, &params->dist, insert_length,
sr.len, sr.len_code_delta, distance_code);
}
*num_literals += insert_length;
insert_length = 0;

View File

@ -13,6 +13,7 @@
#include "../common/platform.h"
#include <brotli/types.h>
#include "./fast_log.h"
#include "./params.h"
#include "./prefix.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -116,7 +117,8 @@ typedef struct Command {
} Command;
/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
static BROTLI_INLINE void InitCommand(Command* self,
const BrotliDistanceParams* dist, size_t insertlen,
size_t copylen, int copylen_code_delta, size_t distance_code) {
/* Don't rely on signed int representation, use honest casts. */
uint32_t delta = (uint8_t)((int8_t)copylen_code_delta);
@ -126,7 +128,8 @@ static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
npostfix and ndirect were 0, they are only recomputed later after the
clustering if needed. */
PrefixEncodeCopyDistance(
distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
distance_code, dist->num_direct_distance_codes,
dist->distance_postfix_bits, &self->dist_prefix_, &self->dist_extra_);
GetLengthCode(
insertlen, (size_t)((int)copylen + copylen_code_delta),
TO_BROTLI_BOOL((self->dist_prefix_ & 0x3FF) == 0), &self->cmd_prefix_);
@ -140,21 +143,24 @@ static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
GetLengthCode(insertlen, 4, BROTLI_FALSE, &self->cmd_prefix_);
}
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(const Command* self) {
if ((self->dist_prefix_ & 0x3FF) < BROTLI_NUM_DISTANCE_SHORT_CODES) {
static BROTLI_INLINE uint32_t CommandRestoreDistanceCode(
const Command* self, const BrotliDistanceParams* dist) {
if ((self->dist_prefix_ & 0x3FF) <
BROTLI_NUM_DISTANCE_SHORT_CODES + dist->num_direct_distance_codes) {
return self->dist_prefix_ & 0x3FF;
} else {
uint32_t dcode = self->dist_prefix_ & 0x3FF;
uint32_t nbits = self->dist_prefix_ >> 10;
uint32_t extra = self->dist_extra_;
/* It is assumed that the distance was first encoded with NPOSTFIX = 0 and
NDIRECT = 0, so the code itself is of this form:
BROTLI_NUM_DISTANCE_SHORT_CODES + 2 * (nbits - 1) + prefix_bit
Therefore, the following expression results in (2 + prefix_bit). */
uint32_t prefix = (self->dist_prefix_ & 0x3FF) + 4u -
BROTLI_NUM_DISTANCE_SHORT_CODES - 2u * nbits;
/* Subtract 4 for offset (Chapter 4.) and
increase by BROTLI_NUM_DISTANCE_SHORT_CODES - 1 */
return (prefix << nbits) + extra + BROTLI_NUM_DISTANCE_SHORT_CODES - 4u;
uint32_t postfix_mask = (1U << dist->distance_postfix_bits) - 1U;
uint32_t hcode = (dcode - dist->num_direct_distance_codes -
BROTLI_NUM_DISTANCE_SHORT_CODES) >>
dist->distance_postfix_bits;
uint32_t lcode = (dcode - dist->num_direct_distance_codes -
BROTLI_NUM_DISTANCE_SHORT_CODES) & postfix_mask;
uint32_t offset = ((2U + (hcode & 1U)) << nbits) - 4U;
return ((offset + extra) << dist->distance_postfix_bits) + lcode +
dist->num_direct_distance_codes + BROTLI_NUM_DISTANCE_SHORT_CODES;
}
}

View File

@ -171,26 +171,6 @@ BROTLI_BOOL BrotliEncoderSetParameter(
}
}
static void RecomputeDistancePrefixes(Command* cmds,
size_t num_commands,
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits) {
size_t i;
if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
return;
}
for (i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd),
num_direct_distance_codes,
distance_postfix_bits,
&cmd->dist_prefix_,
&cmd->dist_extra_);
}
}
}
/* Wraps 64-bit input position to 32-bit ring-buffer position preserving
"not-a-first-lap" feature. */
static uint32_t WrapPosition(uint64_t position) {
@ -587,13 +567,6 @@ static void WriteMetaBlockInternal(MemoryManager* m,
BROTLI_DCHECK(*storage_ix <= 14);
last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
last_bytes_bits = (uint8_t)(*storage_ix);
if (params->dist.num_direct_distance_codes != 0 ||
params->dist.distance_postfix_bits != 0) {
RecomputeDistancePrefixes(commands,
num_commands,
params->dist.num_direct_distance_codes,
params->dist.distance_postfix_bits);
}
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
bytes, mask, is_last, params,
@ -663,6 +636,8 @@ static void WriteMetaBlockInternal(MemoryManager* m,
}
static void ChooseDistanceParams(BrotliEncoderParams* params) {
/* NDIRECT, NPOSTFIX must be both zero for qualities
up to MIN_QUALITY_FOR_BLOCK_SPLIT == 3. */
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
uint32_t alphabet_size;
@ -782,9 +757,8 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
}
BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
brotli_free_func free_func,
void* opaque) {
BrotliEncoderState* BrotliEncoderCreateInstance(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
BrotliEncoderState* state = 0;
if (!alloc_func && !free_func) {
state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
@ -912,7 +886,8 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
uint64_t max_distance = last_processed_pos < max_backward_distance ?
last_processed_pos : max_backward_distance;
uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
uint32_t distance_code = CommandRestoreDistanceCode(last_command);
uint32_t distance_code = CommandRestoreDistanceCode(last_command,
&s->params.dist);
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
if (cmd_dist <= max_distance) {

View File

@ -58,8 +58,8 @@ public class BrotliDecoderChannel extends Decoder implements ReadableByteChannel
int result = 0;
while (dst.hasRemaining()) {
int outputSize = decode();
if (outputSize == -1) {
return result == 0 ? -1 : result;
if (outputSize <= 0) {
return result == 0 ? outputSize : result;
}
result += consume(dst);
}

View File

@ -35,3 +35,10 @@ cc_binary(
":sieve",
],
)
cc_binary(
name = "brotli_decoder",
srcs = ["brotli_decoder.c"],
linkstatic = 1,
deps = ["//:brotlidec"],
)

93
research/brotli_decoder.c Normal file
View File

@ -0,0 +1,93 @@
/* Copyright 2018 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <brotli/decode.h>
#define BUFFER_SIZE (1u << 20)
typedef struct Context {
FILE* fin;
FILE* fout;
uint8_t* input_buffer;
uint8_t* output_buffer;
BrotliDecoderState* decoder;
} Context;
void init(Context* ctx) {
ctx->fin = 0;
ctx->fout = 0;
ctx->input_buffer = 0;
ctx->output_buffer = 0;
ctx->decoder = 0;
}
void cleanup(Context* ctx) {
if (ctx->decoder) BrotliDecoderDestroyInstance(ctx->decoder);
if (ctx->output_buffer) free(ctx->output_buffer);
if (ctx->input_buffer) free(ctx->input_buffer);
if (ctx->fout) fclose(ctx->fout);
if (ctx->fin) fclose(ctx->fin);
}
void fail(Context* ctx, const char* message) {
fprintf(stderr, "%s\n", message);
exit(1);
}
int main(int argc, char** argv) {
Context ctx;
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
size_t available_in;
const uint8_t* next_in;
size_t available_out = BUFFER_SIZE;
uint8_t* next_out;
init(&ctx);
ctx.fin = fdopen(STDIN_FILENO, "rb");
if (!ctx.fin) fail(&ctx, "can't open input file");
ctx.fout = fdopen(STDOUT_FILENO, "wb");
if (!ctx.fout) fail(&ctx, "can't open output file");
ctx.input_buffer = (uint8_t*)malloc(BUFFER_SIZE);
if (!ctx.input_buffer) fail(&ctx, "out of memory / input buffer");
ctx.output_buffer = (uint8_t*)malloc(BUFFER_SIZE);
if (!ctx.output_buffer) fail(&ctx, "out of memory / output buffer");
ctx.decoder = BrotliDecoderCreateInstance(0, 0, 0);
if (!ctx.decoder) fail(&ctx, "out of memory / decoder");
BrotliDecoderSetParameter(ctx.decoder, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1);
next_out = ctx.output_buffer;
while (1) {
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
if (feof(ctx.fin)) break;
available_in = fread(ctx.input_buffer, 1, BUFFER_SIZE, ctx.fin);
next_in = ctx.input_buffer;
if (ferror(ctx.fin)) break;
} else if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
fwrite(ctx.output_buffer, 1, BUFFER_SIZE, ctx.fout);
if (ferror(ctx.fout)) break;
available_out = BUFFER_SIZE;
next_out = ctx.output_buffer;
} else {
break;
}
result = BrotliDecoderDecompressStream(
ctx.decoder, &available_in, &next_in, &available_out, &next_out, 0);
}
if (next_out != ctx.output_buffer) {
fwrite(ctx.output_buffer, 1, next_out - ctx.output_buffer, ctx.fout);
}
if ((result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) || ferror(ctx.fout)) {
fail(&ctx, "failed to write output");
} else if (result != BROTLI_DECODER_RESULT_SUCCESS) {
fail(&ctx, "corrupt input");
}
cleanup(&ctx);
return 0;
}