diff --git a/BUILD b/BUILD index 6265b71..72781d7 100644 --- a/BUILD +++ b/BUILD @@ -83,6 +83,7 @@ STRICT_C_OPTIONS = [ "-Wmissing-declarations", "-Wmissing-prototypes", "-Wno-strict-aliasing", + "-Wno-implicit-fallthrough", "-Wshadow", "-Wsign-compare", ] diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index 62ecea7..3dd897b 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -49,6 +49,7 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #define CAT(a, b) a ## b #define FN(X) EXPAND_CAT(X, HASHER()) #define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER())) + #define PREFIX() N #define HASHER() H2 @@ -97,6 +98,7 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance, #undef HASHER #undef PREFIX + #undef EXPORT_FN #undef FN #undef CAT diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c index 62c6ba2..e7486c4 100644 --- a/c/enc/backward_references_hq.c +++ b/c/enc/backward_references_hq.c @@ -76,7 +76,7 @@ typedef struct ZopfliCostModel { /* The insert and copy length symbols. */ float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS]; float* cost_dist_; - uint32_t distance_alphabet_size; + uint32_t distance_histogram_size; /* Cumulative costs of literals per position in the stream. */ float* literal_costs_; float min_cost_cmd_; @@ -86,10 +86,14 @@ typedef struct ZopfliCostModel { static void InitZopfliCostModel( MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist, size_t num_bytes) { + uint32_t distance_histogram_size = dist->alphabet_size; + if (distance_histogram_size > BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE) { + distance_histogram_size = BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE; + } self->num_bytes_ = num_bytes; self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2); self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size); - self->distance_alphabet_size = dist->alphabet_size; + self->distance_histogram_size = distance_histogram_size; if (BROTLI_IS_OOM(m)) return; } @@ -171,7 +175,7 @@ static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self, cost_literal); SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE, cost_cmd); - SetCost(histogram_dist, self->distance_alphabet_size, BROTLI_FALSE, + SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE, self->cost_dist_); for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { @@ -214,7 +218,7 @@ static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self, for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) { cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i); } - for (i = 0; i < self->distance_alphabet_size; ++i) { + for (i = 0; i < self->distance_histogram_size; ++i) { cost_dist[i] = (float)FastLog2(20 + (uint32_t)i); } self->min_cost_cmd_ = (float)FastLog2(11); diff --git a/c/enc/encode.c b/c/enc/encode.c index 563c827..623b88f 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c @@ -167,6 +167,14 @@ BROTLI_BOOL BrotliEncoderSetParameter( state->params.large_window = TO_BROTLI_BOOL(!!value); return BROTLI_TRUE; + case BROTLI_PARAM_NPOSTFIX: + state->params.dist.distance_postfix_bits = value; + return BROTLI_TRUE; + + case BROTLI_PARAM_NDIRECT: + state->params.dist.num_direct_distance_codes = value; + return BROTLI_TRUE; + default: return BROTLI_FALSE; } } @@ -636,26 +644,42 @@ static void WriteMetaBlockInternal(MemoryManager* m, } static void ChooseDistanceParams(BrotliEncoderParams* params) { - /* NDIRECT, NPOSTFIX must be both zero for qualities - up to MIN_QUALITY_FOR_BLOCK_SPLIT == 3. */ - uint32_t num_direct_distance_codes = 0; uint32_t distance_postfix_bits = 0; + uint32_t num_direct_distance_codes = 0; uint32_t alphabet_size; size_t max_distance = BROTLI_MAX_DISTANCE; - if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && - params->mode == BROTLI_MODE_FONT) { - num_direct_distance_codes = 12; - distance_postfix_bits = 1; - max_distance = (1U << 27) + 4; + if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) { + uint32_t ndirect_msb; + if (params->mode == BROTLI_MODE_FONT) { + distance_postfix_bits = 1; + num_direct_distance_codes = 12; + } else { + distance_postfix_bits = params->dist.distance_postfix_bits; + num_direct_distance_codes = params->dist.num_direct_distance_codes; + } + ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F; + if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX || + num_direct_distance_codes > BROTLI_MAX_NDIRECT || + (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) { + distance_postfix_bits = 0; + num_direct_distance_codes = 0; + } + max_distance = num_direct_distance_codes + + (1U << (BROTLI_MAX_DISTANCE_BITS + distance_postfix_bits + 2)) - + (1U << (distance_postfix_bits + 2)); } alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( num_direct_distance_codes, distance_postfix_bits, BROTLI_MAX_DISTANCE_BITS); if (params->large_window) { - max_distance = BROTLI_MAX_ALLOWED_DISTANCE; - if (num_direct_distance_codes != 0 || distance_postfix_bits != 0) { + /* The maximum distance is set so that no distance symbol used can encode + a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all + its extra bits set. */ + const uint32_t bound_ndirect[BROTLI_MAX_NDIRECT + 1] = {1, 6, 16, 36}; + max_distance = (1U << 31) - 32; + if (num_direct_distance_codes >= bound_ndirect[distance_postfix_bits]) { max_distance = (3U << 29) - 4; } alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE( @@ -663,8 +687,8 @@ static void ChooseDistanceParams(BrotliEncoderParams* params) { BROTLI_LARGE_MAX_DISTANCE_BITS); } - params->dist.num_direct_distance_codes = num_direct_distance_codes; params->dist.distance_postfix_bits = distance_postfix_bits; + params->dist.num_direct_distance_codes = num_direct_distance_codes; params->dist.alphabet_size = alphabet_size; params->dist.max_distance = max_distance; } @@ -710,8 +734,8 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) { params->size_hint = 0; params->disable_literal_context_modeling = BROTLI_FALSE; BrotliInitEncoderDictionary(¶ms->dictionary); - params->dist.num_direct_distance_codes = 0; params->dist.distance_postfix_bits = 0; + params->dist.num_direct_distance_codes = 0; params->dist.alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS); params->dist.max_distance = BROTLI_MAX_DISTANCE; diff --git a/c/enc/memory.h b/c/enc/memory.h index 2d56e97..ab928d0 100644 --- a/c/enc/memory.h +++ b/c/enc/memory.h @@ -80,6 +80,21 @@ R: requested size } \ } +/* +Appends value and dynamically grows array capacity when needed +M: MemoryManager +T: data type +A: array +C: array capacity +S: array size +V: value to append +*/ +#define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \ + (S)++; \ + BROTLI_ENSURE_CAPACITY(M, T, A, C, S); \ + A[(S) - 1] = (V); \ +} + #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif diff --git a/c/enc/params.h b/c/enc/params.h index 9bcf236..6ecf1d3 100755 --- a/c/enc/params.h +++ b/c/enc/params.h @@ -21,8 +21,8 @@ typedef struct BrotliHasherParams { } BrotliHasherParams; typedef struct BrotliDistanceParams { - uint32_t num_direct_distance_codes; uint32_t distance_postfix_bits; + uint32_t num_direct_distance_codes; uint32_t alphabet_size; size_t max_distance; } BrotliDistanceParams; diff --git a/c/enc/quality.h b/c/enc/quality.h index f9b1111..aa7ba0d 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -21,13 +21,12 @@ #define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2 #define MIN_QUALITY_FOR_BLOCK_SPLIT 4 +#define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4 #define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4 #define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5 #define MIN_QUALITY_FOR_CONTEXT_MODELING 5 #define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7 #define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10 -/* Only for "font" mode. */ -#define MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES 10 /* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting, so we buffer at most this much literals and commands. */ diff --git a/c/include/brotli/encode.h b/c/include/brotli/encode.h index 0b5c8c7..0ced7e5 100644 --- a/c/include/brotli/encode.h +++ b/c/include/brotli/encode.h @@ -185,7 +185,23 @@ typedef enum BrotliEncoderParameter { /** * Flag that determines if "Large Window Brotli" is used. */ - BROTLI_PARAM_LARGE_WINDOW = 6 + BROTLI_PARAM_LARGE_WINDOW = 6, + /** + * Recommended number of postfix bits (NPOSTFIX). + * + * Encoder may change this value. + * + * Range is from 0 to ::BROTLI_MAX_NPOSTFIX. + */ + BROTLI_PARAM_NPOSTFIX = 7, + /** + * Recommended number of direct distance codes (NDIRECT). + * + * Encoder may change this value. + * + * Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX). + */ + BROTLI_PARAM_NDIRECT = 8 } BrotliEncoderParameter; /** diff --git a/c/tools/brotli.c b/c/tools/brotli.c index 2abfc27..17a3bb6 100644 --- a/c/tools/brotli.c +++ b/c/tools/brotli.c @@ -195,6 +195,7 @@ static Command ParseParams(Context* params) { This check is an additional guard that is never triggered, but provides a guard for future changes. */ if (next_option_index > (MAX_OPTIONS - 2)) { + fprintf(stderr, "too many options passed\n"); return COMMAND_INVALID; } @@ -220,80 +221,135 @@ static Command ParseParams(Context* params) { for (j = 1; j < arg_len; ++j) { char c = arg[j]; if (c >= '0' && c <= '9') { - if (quality_set) return COMMAND_INVALID; + if (quality_set) { + fprintf(stderr, "quality already set\n"); + return COMMAND_INVALID; + } quality_set = BROTLI_TRUE; params->quality = c - '0'; continue; } else if (c == 'c') { - if (output_set) return COMMAND_INVALID; + if (output_set) { + fprintf(stderr, "write to standard output already set\n"); + return COMMAND_INVALID; + } output_set = BROTLI_TRUE; params->write_to_stdout = BROTLI_TRUE; continue; } else if (c == 'd') { - if (command_set) return COMMAND_INVALID; + if (command_set) { + fprintf(stderr, "command already set when parsing -d\n"); + return COMMAND_INVALID; + } command_set = BROTLI_TRUE; command = COMMAND_DECOMPRESS; continue; } else if (c == 'f') { - if (params->force_overwrite) return COMMAND_INVALID; + if (params->force_overwrite) { + fprintf(stderr, "force output overwrite already set\n"); + return COMMAND_INVALID; + } params->force_overwrite = BROTLI_TRUE; continue; } else if (c == 'h') { /* Don't parse further. */ return COMMAND_HELP; } else if (c == 'j' || c == 'k') { - if (keep_set) return COMMAND_INVALID; + if (keep_set) { + fprintf(stderr, "argument --rm / -j or --keep / -n already set\n"); + return COMMAND_INVALID; + } keep_set = BROTLI_TRUE; params->junk_source = TO_BROTLI_BOOL(c == 'j'); continue; } else if (c == 'n') { - if (!params->copy_stat) return COMMAND_INVALID; + if (!params->copy_stat) { + fprintf(stderr, "argument --no-copy-stat / -n already set\n"); + return COMMAND_INVALID; + } params->copy_stat = BROTLI_FALSE; continue; } else if (c == 't') { - if (command_set) return COMMAND_INVALID; + if (command_set) { + fprintf(stderr, "command already set when parsing -t\n"); + return COMMAND_INVALID; + } command_set = BROTLI_TRUE; command = COMMAND_TEST_INTEGRITY; continue; } else if (c == 'v') { - if (params->verbose) return COMMAND_INVALID; + if (params->verbose) { + fprintf(stderr, "argument --verbose / -v already set\n"); + return COMMAND_INVALID; + } params->verbose = BROTLI_TRUE; continue; } else if (c == 'V') { /* Don't parse further. */ return COMMAND_VERSION; } else if (c == 'Z') { - if (quality_set) return COMMAND_INVALID; + if (quality_set) { + fprintf(stderr, "quality already set\n"); + return COMMAND_INVALID; + } quality_set = BROTLI_TRUE; params->quality = 11; continue; } /* o/q/w/D/S with parameter is expected */ if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') { + fprintf(stderr, "invalid argument -%c\n", c); + return COMMAND_INVALID; + } + if (j + 1 != arg_len) { + fprintf(stderr, "expected parameter for argument -%c\n", c); return COMMAND_INVALID; } - if (j + 1 != arg_len) return COMMAND_INVALID; i++; - if (i == argc || !argv[i] || argv[i][0] == 0) return COMMAND_INVALID; + if (i == argc || !argv[i] || argv[i][0] == 0) { + fprintf(stderr, "expected parameter for argument -%c\n", c); + return COMMAND_INVALID; + } params->not_input_indices[next_option_index++] = i; if (c == 'o') { - if (output_set) return COMMAND_INVALID; + if (output_set) { + fprintf(stderr, "write to standard output already set (-o)\n"); + return COMMAND_INVALID; + } params->output_path = argv[i]; } else if (c == 'q') { - if (quality_set) return COMMAND_INVALID; + if (quality_set) { + fprintf(stderr, "quality already set\n"); + return COMMAND_INVALID; + } quality_set = ParseInt(argv[i], BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY, ¶ms->quality); - if (!quality_set) return COMMAND_INVALID; + if (!quality_set) { + fprintf(stderr, "error parsing quality value [%s]\n", argv[i]); + return COMMAND_INVALID; + } } else if (c == 'w') { - if (lgwin_set) return COMMAND_INVALID; + if (lgwin_set) { + fprintf(stderr, "lgwin parameter already set\n"); + return COMMAND_INVALID; + } lgwin_set = ParseInt(argv[i], 0, BROTLI_MAX_WINDOW_BITS, ¶ms->lgwin); - if (!lgwin_set) return COMMAND_INVALID; + if (!lgwin_set) { + fprintf(stderr, "error parsing lgwin value [%s]\n", argv[i]); + return COMMAND_INVALID; + } if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) { + fprintf(stderr, + "lgwin parameter (%d) smaller than the minimum (%d)\n", + params->lgwin, BROTLI_MIN_WINDOW_BITS); return COMMAND_INVALID; } } else if (c == 'S') { - if (suffix_set) return COMMAND_INVALID; + if (suffix_set) { + fprintf(stderr, "suffix already set\n"); + return COMMAND_INVALID; + } suffix_set = BROTLI_TRUE; params->suffix = argv[i]; } @@ -301,40 +357,67 @@ static Command ParseParams(Context* params) { } else { /* Double-dash. */ arg = &arg[2]; if (strcmp("best", arg) == 0) { - if (quality_set) return COMMAND_INVALID; + if (quality_set) { + fprintf(stderr, "quality already set\n"); + return COMMAND_INVALID; + } quality_set = BROTLI_TRUE; params->quality = 11; } else if (strcmp("decompress", arg) == 0) { - if (command_set) return COMMAND_INVALID; + if (command_set) { + fprintf(stderr, "command already set when parsing --decompress\n"); + return COMMAND_INVALID; + } command_set = BROTLI_TRUE; command = COMMAND_DECOMPRESS; } else if (strcmp("force", arg) == 0) { - if (params->force_overwrite) return COMMAND_INVALID; + if (params->force_overwrite) { + fprintf(stderr, "force output overwrite already set\n"); + return COMMAND_INVALID; + } params->force_overwrite = BROTLI_TRUE; } else if (strcmp("help", arg) == 0) { /* Don't parse further. */ return COMMAND_HELP; } else if (strcmp("keep", arg) == 0) { - if (keep_set) return COMMAND_INVALID; + if (keep_set) { + fprintf(stderr, "argument --rm / -j or --keep / -n already set\n"); + return COMMAND_INVALID; + } keep_set = BROTLI_TRUE; params->junk_source = BROTLI_FALSE; } else if (strcmp("no-copy-stat", arg) == 0) { - if (!params->copy_stat) return COMMAND_INVALID; + if (!params->copy_stat) { + fprintf(stderr, "argument --no-copy-stat / -n already set\n"); + return COMMAND_INVALID; + } params->copy_stat = BROTLI_FALSE; } else if (strcmp("rm", arg) == 0) { - if (keep_set) return COMMAND_INVALID; + if (keep_set) { + fprintf(stderr, "argument --rm / -j or --keep / -n already set\n"); + return COMMAND_INVALID; + } keep_set = BROTLI_TRUE; params->junk_source = BROTLI_TRUE; } else if (strcmp("stdout", arg) == 0) { - if (output_set) return COMMAND_INVALID; + if (output_set) { + fprintf(stderr, "write to standard output already set\n"); + return COMMAND_INVALID; + } output_set = BROTLI_TRUE; params->write_to_stdout = BROTLI_TRUE; } else if (strcmp("test", arg) == 0) { - if (command_set) return COMMAND_INVALID; + if (command_set) { + fprintf(stderr, "command already set when parsing --test\n"); + return COMMAND_INVALID; + } command_set = BROTLI_TRUE; command = COMMAND_TEST_INTEGRITY; } else if (strcmp("verbose", arg) == 0) { - if (params->verbose) return COMMAND_INVALID; + if (params->verbose) { + fprintf(stderr, "argument --verbose / -v already set\n"); + return COMMAND_INVALID; + } params->verbose = BROTLI_TRUE; } else if (strcmp("version", arg) == 0) { /* Don't parse further. */ @@ -343,30 +426,56 @@ static Command ParseParams(Context* params) { /* key=value */ const char* value = strrchr(arg, '='); size_t key_len; - if (!value || value[1] == 0) return COMMAND_INVALID; + if (!value || value[1] == 0) { + fprintf(stderr, "must pass the parameter as --%s=value\n", arg); + return COMMAND_INVALID; + } key_len = (size_t)(value - arg); value++; if (strncmp("lgwin", arg, key_len) == 0) { - if (lgwin_set) return COMMAND_INVALID; + if (lgwin_set) { + fprintf(stderr, "lgwin parameter already set\n"); + return COMMAND_INVALID; + } lgwin_set = ParseInt(value, 0, BROTLI_MAX_WINDOW_BITS, ¶ms->lgwin); - if (!lgwin_set) return COMMAND_INVALID; + if (!lgwin_set) { + fprintf(stderr, "error parsing lgwin value [%s]\n", value); + return COMMAND_INVALID; + } if (params->lgwin != 0 && params->lgwin < BROTLI_MIN_WINDOW_BITS) { + fprintf(stderr, + "lgwin parameter (%d) smaller than the minimum (%d)\n", + params->lgwin, BROTLI_MIN_WINDOW_BITS); return COMMAND_INVALID; } } else if (strncmp("output", arg, key_len) == 0) { - if (output_set) return COMMAND_INVALID; + if (output_set) { + fprintf(stderr, + "write to standard output already set (--output)\n"); + return COMMAND_INVALID; + } params->output_path = value; } else if (strncmp("quality", arg, key_len) == 0) { - if (quality_set) return COMMAND_INVALID; + if (quality_set) { + fprintf(stderr, "quality already set\n"); + return COMMAND_INVALID; + } quality_set = ParseInt(value, BROTLI_MIN_QUALITY, BROTLI_MAX_QUALITY, ¶ms->quality); - if (!quality_set) return COMMAND_INVALID; + if (!quality_set) { + fprintf(stderr, "error parsing quality value [%s]\n", value); + return COMMAND_INVALID; + } } else if (strncmp("suffix", arg, key_len) == 0) { - if (suffix_set) return COMMAND_INVALID; + if (suffix_set) { + fprintf(stderr, "suffix already set\n"); + return COMMAND_INVALID; + } suffix_set = BROTLI_TRUE; params->suffix = value; } else { + fprintf(stderr, "invalid parameter: [%s]\n", arg); return COMMAND_INVALID; } } diff --git a/docs/encode.h.3 b/docs/encode.h.3 index 906ce07..eff57bd 100644 --- a/docs/encode.h.3 +++ b/docs/encode.h.3 @@ -294,6 +294,16 @@ Estimated total input size for all \fBBrotliEncoderCompressStream\fP calls\&. Th .TP \fB\fIBROTLI_PARAM_LARGE_WINDOW \fP\fP Flag that determines if 'Large Window Brotli' is used\&. +.TP +\fB\fIBROTLI_PARAM_NPOSTFIX \fP\fP +Recommended number of postfix bits (NPOSTFIX)\&. Encoder may change this value\&. +.PP +Range is from 0 to ::BROTLI_MAX_NPOSTFIX\&. +.TP +\fB\fIBROTLI_PARAM_NDIRECT \fP\fP +Recommended number of direct distance codes (NDIRECT)\&. Encoder may change this value\&. +.PP +Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX)\&. .SH "Function Documentation" .PP .SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])" diff --git a/research/durchschlag.cc b/research/durchschlag.cc index cc4ed68..2fbf41b 100755 --- a/research/durchschlag.cc +++ b/research/durchschlag.cc @@ -75,6 +75,8 @@ static std::string createDictionary( return output; } +/* precondition: span > 0 + precondition: end + span == len(shortcut) */ static Score buildCandidatesList(std::vector* candidates, std::vector* map, TextIdx span, const TextIdx* shortcut, TextIdx end) { @@ -87,7 +89,10 @@ static Score buildCandidatesList(std::vector* candidates, } Score score = 0; - for (size_t j = 0; j < span; ++j) { + /* Consider the whole span, except one last item. The following loop will + add the last item to the end of the "chain", evaluate it, and cut one + "link" form the beginning. */ + for (size_t j = 0; j < span - 1; ++j) { MetaSlot& item = slots[shortcut[j]]; if (item.mark == 0) { score += item.score; @@ -99,7 +104,8 @@ static Score buildCandidatesList(std::vector* candidates, TextIdx limit = std::min(end, CANDIDATE_BUNDLE_SIZE); Score maxScore = 0; for (; i < limit; ++i) { - MetaSlot& pick = slots[shortcut[i + span]]; + TextIdx slice = shortcut[i + span - 1]; + MetaSlot& pick = slots[slice]; if (pick.mark == 0) { score += pick.score; } @@ -120,7 +126,8 @@ static Score buildCandidatesList(std::vector* candidates, std::make_heap(candidates->begin(), candidates->end(), greaterScore()); Score minScore = candidates->at(0).score; for (; i < end; ++i) { - MetaSlot& pick = slots[shortcut[i + span]]; + TextIdx slice = shortcut[i + span - 1]; + MetaSlot& pick = slots[slice]; if (pick.mark == 0) { score += pick.score; } @@ -156,6 +163,8 @@ static Score buildCandidatesList(std::vector* candidates, return minScore; } +/* precondition: span > 0 + precondition: end + span == len(shortcut) */ static Score rebuildCandidatesList(std::vector* candidates, std::vector* map, TextIdx span, const TextIdx* shortcut, TextIdx end, TextIdx* next) { @@ -172,7 +181,10 @@ static Score rebuildCandidatesList(std::vector* candidates, } Score score = 0; - for (TextIdx i = 0; i < span; ++i) { + /* Consider the whole span, except one last item. The following loop will + add the last item to the end of the "chain", evaluate it, and cut one + "link" form the beginning. */ + for (TextIdx i = 0; i < span - 1; ++i) { MetaSlot& item = slots[shortcut[i]]; if (item.mark == 0) { score += item.score; @@ -182,7 +194,7 @@ static Score rebuildCandidatesList(std::vector* candidates, Score maxScore = 0; for (TextIdx i = 0; i < end; ++i) { - MetaSlot& pick = slots[shortcut[i + span]]; + MetaSlot& pick = slots[shortcut[i + span - 1]]; if (pick.mark == 0) { score += pick.score; } @@ -460,10 +472,10 @@ static std::string durchschlagGenerateExclusive( } TextIdx end = total - sliceLen + 1; ScoreSlices(offsets, map, shortcut, end); - end = total - blockLen + 1; + TextIdx span = blockLen - sliceLen + 1; + end = static_cast(context.sliceMap.size()) - span; std::vector candidates; std::vector next(end); - TextIdx span = blockLen - sliceLen + 1; Score maxScore = rebuildCandidatesList( &candidates, &map, span, shortcut, end, next.data()); @@ -499,7 +511,7 @@ static std::string durchschlagGenerateExclusive( numTries++; numCandidates++; Score score = 0; - for (size_t j = candidate; j <= candidate + span; ++j) { + for (size_t j = candidate; j < candidate + span; ++j) { MetaSlot& item = map[shortcut[j]]; if (item.mark != mark) { score += item.score; @@ -522,7 +534,7 @@ static std::string durchschlagGenerateExclusive( fprintf(stderr, "Broken invariant\n"); return ""; } - for (TextIdx j = candidate; j <= candidate + span; ++j) { + for (TextIdx j = candidate; j < candidate + span; ++j) { MetaSlot& item = map[shortcut[j]]; item.score = 0; } @@ -566,10 +578,10 @@ static std::string durchschlagGenerateCollaborative( } TextIdx end = total - sliceLen + 1; ScoreSlices(offsets, map, shortcut, end); - end = total - blockLen + 1; + TextIdx span = blockLen - sliceLen + 1; + end = static_cast(context.sliceMap.size()) - span; std::vector candidates; candidates.reserve(CANDIDATE_BUNDLE_SIZE + 1024); - TextIdx span = blockLen - sliceLen + 1; Score minScore = buildCandidatesList(&candidates, &map, span, shortcut, end); /* Block selection */ @@ -598,7 +610,7 @@ static std::string durchschlagGenerateCollaborative( candidates.pop_back(); mark++; Score score = 0; - for (TextIdx j = candidate; j <= candidate + span; ++j) { + for (TextIdx j = candidate; j < candidate + span; ++j) { MetaSlot& item = map[shortcut[j]]; if (item.mark != mark) { score += item.score; @@ -614,7 +626,7 @@ static std::string durchschlagGenerateCollaborative( } else if (score > expectedScore) { fatal("Broken invariant"); } - for (TextIdx j = candidate; j <= candidate + span; ++j) { + for (TextIdx j = candidate; j < candidate + span; ++j) { MetaSlot& item = map[shortcut[j]]; item.score = 0; } diff --git a/scripts/appveyor.yml b/scripts/appveyor.yml index f5f0c13..c7881ad 100644 --- a/scripts/appveyor.yml +++ b/scripts/appveyor.yml @@ -42,7 +42,7 @@ install: ) ) - IF "%BUILD_SYSTEM%"=="bazel" ( - appveyor DownloadFile https://github.com/bazelbuild/bazel/releases/download/0.10.0/bazel-0.10.0-windows-x86_64.exe -FileName bazel.exe + appveyor DownloadFile https://github.com/bazelbuild/bazel/releases/download/0.11.1/bazel-0.11.1-windows-x86_64.exe -FileName bazel.exe ) before_build: diff --git a/scripts/sources.lst b/scripts/sources.lst index cdddb37..4ca22bc 100644 --- a/scripts/sources.lst +++ b/scripts/sources.lst @@ -81,6 +81,7 @@ BROTLI_ENC_H = \ c/enc/memory.h \ c/enc/metablock.h \ c/enc/metablock_inc.h \ + c/enc/params.h \ c/enc/prefix.h \ c/enc/quality.h \ c/enc/ringbuffer.h \ diff --git a/setup.py b/setup.py index d8478b3..7535fb2 100644 --- a/setup.py +++ b/setup.py @@ -249,6 +249,7 @@ EXT_MODULES = [ 'c/enc/memory.h', 'c/enc/metablock.h', 'c/enc/metablock_inc.h', + 'c/enc/params.h', 'c/enc/prefix.h', 'c/enc/quality.h', 'c/enc/ringbuffer.h',