* add transpiled JS decoder
 * make PY wrapper accept memview
 * fix dictionary generator
 * speedup compression of RLEish data
This commit is contained in:
Eugene Kliuchnikov 2017-08-28 11:31:29 +02:00 committed by GitHub
parent 6535435413
commit a629289e32
10 changed files with 1954 additions and 22 deletions

View File

@ -14,6 +14,13 @@ git_repository(
tag = "0.4.4",
)
http_archive(
name = "io_bazel_rules_closure",
strip_prefix = "rules_closure-0.4.1",
sha256 = "ba5e2e10cdc4027702f96e9bdc536c6595decafa94847d08ae28c6cb48225124",
url = "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.4.1.tar.gz",
)
new_http_archive(
name = "openjdk_linux",
url = "https://bazel-mirror.storage.googleapis.com/openjdk/azul-zulu-8.20.0.5-jdk8.0.121/zulu8.20.0.5-jdk8.0.121-linux_x64.tar.gz",
@ -48,3 +55,6 @@ filegroup(
load("@io_bazel_rules_go//go:def.bzl", "go_repositories")
go_repositories()
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
closure_repositories()

View File

@ -96,9 +96,18 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
insert_length = 0;
/* Put the hash keys into the table, if there are enough bytes left.
Depending on the hasher implementation, it can push all positions
in the given range or only a subset of them. */
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
BROTLI_MIN(size_t, position + sr.len, store_end));
in the given range or only a subset of them.
Avoid hash poisoning with RLE data. */
{
size_t range_start = position + 2;
size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
if (sr.distance < (sr.len >> 2)) {
range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
range_start, position + sr.len - (sr.distance << 2)));
}
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, range_start,
range_end);
}
position += sr.len;
} else {
++insert_length;

37
js/BUILD Executable file
View File

@ -0,0 +1,37 @@
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # MIT
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_library")
# Not a real polyfill. Do NOT use for anything, but tests.
closure_js_library(
name = "polyfill",
srcs = ["polyfill.js"],
language = "ECMASCRIPT6_STRICT",
suppress = ["JSC_MISSING_JSDOC"],
)
# Do NOT use this artifact; it is for test purposes only.
closure_js_library(
name = "decode",
srcs = ["decode.js"],
language = "ECMASCRIPT6_STRICT",
suppress = ["JSC_USELESS_BLOCK"],
deps = [":polyfill"],
)
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_test")
closure_js_test(
name = "all_tests",
srcs = ["decode_test.js"],
language = "ECMASCRIPT6_STRICT",
deps = [
":decode",
":polyfill",
"@io_bazel_rules_closure//closure/library:testing",
],
)

1713
js/decode.js Executable file

File diff suppressed because one or more lines are too long

1
js/decode.min.js vendored Executable file

File diff suppressed because one or more lines are too long

72
js/decode_test.js Executable file
View File

@ -0,0 +1,72 @@
goog.require('goog.testing.asserts');
goog.require('goog.testing.jsunit');
/**
* @param {string} bytes
* @return {string}
*/
function bytesToString(bytes) {
return String.fromCharCode.apply(null, new Uint16Array(bytes));
}
function testEmpty() {
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([6]))));
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([0x81, 1]))));
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3]))));
}
function testBaseDictWord() {
var input = Int8Array.from([
0x1b, 0x03, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0x41, 0x02
]);
var output = BrotliDecode(input);
assertEquals("time", bytesToString(output));
}
function testBlockCountMessage() {
var input = Int8Array.from([
0x1b, 0x0b, 0x00, 0x11, 0x01, 0x8c, 0xc1, 0xc5, 0x0d, 0x08, 0x00, 0x22,
0x65, 0xe1, 0xfc, 0xfd, 0x22, 0x2c, 0xc4, 0x00, 0x00, 0x38, 0xd8, 0x32,
0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x04, 0x10, 0x42, 0x00, 0x00, 0x00
]);
var output = BrotliDecode(input);
assertEquals("aabbaaaaabab", bytesToString(output));
}
function testCompressedUncompressedShortCompressedSmallWindow() {
var input = Int8Array.from([
0x21, 0xf4, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x1c, 0xa7, 0x6d, 0x00, 0x00,
0x38, 0xd8, 0x32, 0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x34, 0x7b,
0xdb, 0x50, 0x80, 0x02, 0x80, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x31,
0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x4e, 0xdb, 0x00, 0x00, 0x70, 0xb0,
0x65, 0x12, 0x03, 0x24, 0x00, 0x00, 0xee, 0xb4, 0x11, 0x24, 0x00
]);
var output = BrotliDecode(input);
assertEquals(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaabbbbbbbbbb", bytesToString(output));
}
function testIntactDistanceRingBuffer0() {
var input = Int8Array.from([
0x1b, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0xa1, 0x80,
0x20, 0x00
]);
var output = BrotliDecode(input);
assertEquals("himselfself", bytesToString(output));
}

65
js/polyfill.js Executable file
View File

@ -0,0 +1,65 @@
if (!Int32Array.__proto__.from) {
Object.defineProperty(Int32Array.__proto__, 'from', {
value: function(obj) {
obj = Object(obj);
if (!obj['length']) {
return new this(0);
}
var typed_array = new this(obj.length);
for(var i = 0; i < typed_array.length; i++) {
typed_array[i] = obj[i];
}
return typed_array;
}
});
}
if (!Array.prototype.copyWithin) {
Array.prototype.copyWithin = function(target, start, end) {
var O = Object(this);
var len = O.length >>> 0;
var to = target | 0;
var from = start | 0;
var count = Math.min(Math.min(end | 0, len) - from, len - to);
var direction = 1;
if (from < to && to < (from + count)) {
direction = -1;
from += count - 1;
to += count - 1;
}
while (count > 0) {
O[to] = O[from];
from += direction;
to += direction;
count--;
}
return O;
};
}
if (!Array.prototype.fill) {
Object.defineProperty(Array.prototype, 'fill', {
value: function(value, start, end) {
end = end | 0;
var O = Object(this);
var k = start | 0;
while (k < end) {
O[k] = value;
k++;
}
return O;
}
});
}
if (!Int8Array.prototype.copyWithin) {
Int8Array.prototype.copyWithin = Array.prototype.copyWithin;
}
if (!Int8Array.prototype.fill) {
Int8Array.prototype.fill = Array.prototype.fill;
}
if (!Int32Array.prototype.fill) {
Int32Array.prototype.fill = Array.prototype.fill;
}

View File

@ -1,6 +1,8 @@
This directory contains the code for the Python `brotli` module,
`bro.py` tool, and roundtrip tests.
Only Python 2.7+ is supported.
We provide a `Makefile` to simplify common development commands.
### Installation

View File

@ -88,7 +88,8 @@ static int lgblock_convertor(PyObject *o, int *lgblock) {
}
static BROTLI_BOOL compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op,
std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
std::vector<uint8_t>* output,
uint8_t* input, size_t input_length) {
BROTLI_BOOL ok = BROTLI_TRUE;
Py_BEGIN_ALLOW_THREADS
@ -222,11 +223,15 @@ PyDoc_STRVAR(brotli_Compressor_process_doc,
static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) {
PyObject* ret = NULL;
std::vector<uint8_t> output;
uint8_t* input;
size_t input_length;
Py_buffer input;
BROTLI_BOOL ok = BROTLI_TRUE;
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
#if PY_MAJOR_VERSION >= 3
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input);
#else
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input);
#endif
if (!ok)
return NULL;
@ -236,9 +241,10 @@ static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *ar
}
ok = compress_stream(self->enc, BROTLI_OPERATION_PROCESS,
&output, input, input_length);
&output, static_cast<uint8_t*>(input.buf), input.len);
end:
PyBuffer_Release(&input);
if (ok) {
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
} else {
@ -387,7 +393,8 @@ static PyTypeObject brotli_CompressorType = {
};
static BROTLI_BOOL decompress_stream(BrotliDecoderState* dec,
std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
std::vector<uint8_t>* output,
uint8_t* input, size_t input_length) {
BROTLI_BOOL ok = BROTLI_TRUE;
Py_BEGIN_ALLOW_THREADS
@ -485,11 +492,15 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc,
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) {
PyObject* ret = NULL;
std::vector<uint8_t> output;
uint8_t* input;
size_t input_length;
Py_buffer input;
BROTLI_BOOL ok = BROTLI_TRUE;
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
#if PY_MAJOR_VERSION >= 3
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input);
#else
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input);
#endif
if (!ok)
return NULL;
@ -498,10 +509,10 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject
goto end;
}
ok = decompress_stream(self->dec,
&output, input, input_length);
ok = decompress_stream(self->dec, &output, static_cast<uint8_t*>(input.buf), input.len);
end:
PyBuffer_Release(&input);
if (ok) {
ret = PyBytes_FromStringAndSize((char*)(output.empty() ? NULL : &output[0]), output.size());
} else {
@ -625,15 +636,21 @@ PyDoc_STRVAR(brotli_decompress__doc__,
static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
PyObject *ret = NULL;
const uint8_t *input;
size_t length;
Py_buffer input;
const uint8_t* next_in;
size_t available_in;
int ok;
static const char *kwlist[] = {"string", NULL};
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|:decompress",
const_cast<char **>(kwlist),
&input, &length);
#if PY_MAJOR_VERSION >= 3
ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|:decompress",
const_cast<char **>(kwlist), &input);
#else
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|:decompress",
const_cast<char **>(kwlist), &input);
#endif
if (!ok)
return NULL;
@ -645,9 +662,11 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key
BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0);
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
next_in = static_cast<uint8_t*>(input.buf);
available_in = input.len;
while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
size_t available_out = 0;
result = BrotliDecoderDecompressStream(state, &length, &input,
result = BrotliDecoderDecompressStream(state, &available_in, &next_in,
&available_out, 0, 0);
const uint8_t* next_out = BrotliDecoderTakeOutput(state, &available_out);
if (available_out != 0)
@ -659,6 +678,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key
Py_END_ALLOW_THREADS
/* <<< Pure C block end. Python GIL reacquired. */
PyBuffer_Release(&input);
if (ok) {
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
} else {

View File

@ -252,6 +252,7 @@ retry:
if (best_cost == 0 || best_isle.lcp < MIN_MATCH) {
if (min_count >= 8) {
min_count = (min_count * 7) / 8;
fprintf(stderr, "Retry: min_count=%d\n", min_count);
goto retry;
}
break;
@ -261,8 +262,10 @@ retry:
fprintf(stderr,
"Savings: %zu+%zu, dictionary: %zu+%d\n",
total_cost, best_cost, total, best_isle.lcp);
memcpy(
dictionary + total, full_text.data() + sa[best_isle.l], best_isle.lcp);
for (size_t i = 0; i < best_isle.lcp; ++i) {
dictionary[total + i] =
static_cast<uint8_t>(full_text[sa[best_isle.l] + i]);
}
total += best_isle.lcp;
total_cost += best_cost;
cutMatch(&data, best_isle.l, best_isle.lcp, &sa, &lcp,