mirror of
https://github.com/google/brotli.git
synced 2024-11-24 04:20:16 +00:00
Update (#590)
* add transpiled JS decoder * make PY wrapper accept memview * fix dictionary generator * speedup compression of RLEish data
This commit is contained in:
parent
6535435413
commit
a629289e32
10
WORKSPACE
10
WORKSPACE
@ -14,6 +14,13 @@ git_repository(
|
||||
tag = "0.4.4",
|
||||
)
|
||||
|
||||
http_archive(
|
||||
name = "io_bazel_rules_closure",
|
||||
strip_prefix = "rules_closure-0.4.1",
|
||||
sha256 = "ba5e2e10cdc4027702f96e9bdc536c6595decafa94847d08ae28c6cb48225124",
|
||||
url = "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.4.1.tar.gz",
|
||||
)
|
||||
|
||||
new_http_archive(
|
||||
name = "openjdk_linux",
|
||||
url = "https://bazel-mirror.storage.googleapis.com/openjdk/azul-zulu-8.20.0.5-jdk8.0.121/zulu8.20.0.5-jdk8.0.121-linux_x64.tar.gz",
|
||||
@ -48,3 +55,6 @@ filegroup(
|
||||
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_repositories")
|
||||
go_repositories()
|
||||
|
||||
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
|
||||
closure_repositories()
|
||||
|
@ -96,9 +96,18 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
|
||||
insert_length = 0;
|
||||
/* Put the hash keys into the table, if there are enough bytes left.
|
||||
Depending on the hasher implementation, it can push all positions
|
||||
in the given range or only a subset of them. */
|
||||
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
|
||||
BROTLI_MIN(size_t, position + sr.len, store_end));
|
||||
in the given range or only a subset of them.
|
||||
Avoid hash poisoning with RLE data. */
|
||||
{
|
||||
size_t range_start = position + 2;
|
||||
size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
|
||||
if (sr.distance < (sr.len >> 2)) {
|
||||
range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
|
||||
range_start, position + sr.len - (sr.distance << 2)));
|
||||
}
|
||||
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, range_start,
|
||||
range_end);
|
||||
}
|
||||
position += sr.len;
|
||||
} else {
|
||||
++insert_length;
|
||||
|
37
js/BUILD
Executable file
37
js/BUILD
Executable file
@ -0,0 +1,37 @@
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # MIT
|
||||
|
||||
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_library")
|
||||
|
||||
# Not a real polyfill. Do NOT use for anything, but tests.
|
||||
closure_js_library(
|
||||
name = "polyfill",
|
||||
srcs = ["polyfill.js"],
|
||||
language = "ECMASCRIPT6_STRICT",
|
||||
suppress = ["JSC_MISSING_JSDOC"],
|
||||
)
|
||||
|
||||
# Do NOT use this artifact; it is for test purposes only.
|
||||
closure_js_library(
|
||||
name = "decode",
|
||||
srcs = ["decode.js"],
|
||||
language = "ECMASCRIPT6_STRICT",
|
||||
suppress = ["JSC_USELESS_BLOCK"],
|
||||
deps = [":polyfill"],
|
||||
)
|
||||
|
||||
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_test")
|
||||
|
||||
closure_js_test(
|
||||
name = "all_tests",
|
||||
srcs = ["decode_test.js"],
|
||||
language = "ECMASCRIPT6_STRICT",
|
||||
deps = [
|
||||
":decode",
|
||||
":polyfill",
|
||||
"@io_bazel_rules_closure//closure/library:testing",
|
||||
],
|
||||
)
|
1713
js/decode.js
Executable file
1713
js/decode.js
Executable file
File diff suppressed because one or more lines are too long
1
js/decode.min.js
vendored
Executable file
1
js/decode.min.js
vendored
Executable file
File diff suppressed because one or more lines are too long
72
js/decode_test.js
Executable file
72
js/decode_test.js
Executable file
@ -0,0 +1,72 @@
|
||||
goog.require('goog.testing.asserts');
|
||||
goog.require('goog.testing.jsunit');
|
||||
|
||||
/**
|
||||
* @param {string} bytes
|
||||
* @return {string}
|
||||
*/
|
||||
function bytesToString(bytes) {
|
||||
return String.fromCharCode.apply(null, new Uint16Array(bytes));
|
||||
}
|
||||
|
||||
function testEmpty() {
|
||||
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([6]))));
|
||||
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([0x81, 1]))));
|
||||
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3]))));
|
||||
}
|
||||
|
||||
function testBaseDictWord() {
|
||||
var input = Int8Array.from([
|
||||
0x1b, 0x03, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
|
||||
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0x41, 0x02
|
||||
]);
|
||||
var output = BrotliDecode(input);
|
||||
assertEquals("time", bytesToString(output));
|
||||
}
|
||||
|
||||
function testBlockCountMessage() {
|
||||
var input = Int8Array.from([
|
||||
0x1b, 0x0b, 0x00, 0x11, 0x01, 0x8c, 0xc1, 0xc5, 0x0d, 0x08, 0x00, 0x22,
|
||||
0x65, 0xe1, 0xfc, 0xfd, 0x22, 0x2c, 0xc4, 0x00, 0x00, 0x38, 0xd8, 0x32,
|
||||
0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x04, 0x10, 0x42, 0x00, 0x00, 0x00
|
||||
]);
|
||||
var output = BrotliDecode(input);
|
||||
assertEquals("aabbaaaaabab", bytesToString(output));
|
||||
}
|
||||
|
||||
function testCompressedUncompressedShortCompressedSmallWindow() {
|
||||
var input = Int8Array.from([
|
||||
0x21, 0xf4, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x1c, 0xa7, 0x6d, 0x00, 0x00,
|
||||
0x38, 0xd8, 0x32, 0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x34, 0x7b,
|
||||
0xdb, 0x50, 0x80, 0x02, 0x80, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x31,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x4e, 0xdb, 0x00, 0x00, 0x70, 0xb0,
|
||||
0x65, 0x12, 0x03, 0x24, 0x00, 0x00, 0xee, 0xb4, 0x11, 0x24, 0x00
|
||||
]);
|
||||
var output = BrotliDecode(input);
|
||||
assertEquals(
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
|
||||
"aaaaaaaaaaaaaabbbbbbbbbb", bytesToString(output));
|
||||
}
|
||||
|
||||
function testIntactDistanceRingBuffer0() {
|
||||
var input = Int8Array.from([
|
||||
0x1b, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
|
||||
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0xa1, 0x80,
|
||||
0x20, 0x00
|
||||
]);
|
||||
var output = BrotliDecode(input);
|
||||
assertEquals("himselfself", bytesToString(output));
|
||||
}
|
65
js/polyfill.js
Executable file
65
js/polyfill.js
Executable file
@ -0,0 +1,65 @@
|
||||
if (!Int32Array.__proto__.from) {
|
||||
Object.defineProperty(Int32Array.__proto__, 'from', {
|
||||
value: function(obj) {
|
||||
obj = Object(obj);
|
||||
if (!obj['length']) {
|
||||
return new this(0);
|
||||
}
|
||||
var typed_array = new this(obj.length);
|
||||
for(var i = 0; i < typed_array.length; i++) {
|
||||
typed_array[i] = obj[i];
|
||||
}
|
||||
return typed_array;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (!Array.prototype.copyWithin) {
|
||||
Array.prototype.copyWithin = function(target, start, end) {
|
||||
var O = Object(this);
|
||||
var len = O.length >>> 0;
|
||||
var to = target | 0;
|
||||
var from = start | 0;
|
||||
var count = Math.min(Math.min(end | 0, len) - from, len - to);
|
||||
var direction = 1;
|
||||
if (from < to && to < (from + count)) {
|
||||
direction = -1;
|
||||
from += count - 1;
|
||||
to += count - 1;
|
||||
}
|
||||
while (count > 0) {
|
||||
O[to] = O[from];
|
||||
from += direction;
|
||||
to += direction;
|
||||
count--;
|
||||
}
|
||||
return O;
|
||||
};
|
||||
}
|
||||
|
||||
if (!Array.prototype.fill) {
|
||||
Object.defineProperty(Array.prototype, 'fill', {
|
||||
value: function(value, start, end) {
|
||||
end = end | 0;
|
||||
var O = Object(this);
|
||||
var k = start | 0;
|
||||
while (k < end) {
|
||||
O[k] = value;
|
||||
k++;
|
||||
}
|
||||
return O;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (!Int8Array.prototype.copyWithin) {
|
||||
Int8Array.prototype.copyWithin = Array.prototype.copyWithin;
|
||||
}
|
||||
|
||||
if (!Int8Array.prototype.fill) {
|
||||
Int8Array.prototype.fill = Array.prototype.fill;
|
||||
}
|
||||
|
||||
if (!Int32Array.prototype.fill) {
|
||||
Int32Array.prototype.fill = Array.prototype.fill;
|
||||
}
|
@ -1,6 +1,8 @@
|
||||
This directory contains the code for the Python `brotli` module,
|
||||
`bro.py` tool, and roundtrip tests.
|
||||
|
||||
Only Python 2.7+ is supported.
|
||||
|
||||
We provide a `Makefile` to simplify common development commands.
|
||||
|
||||
### Installation
|
||||
|
@ -88,7 +88,8 @@ static int lgblock_convertor(PyObject *o, int *lgblock) {
|
||||
}
|
||||
|
||||
static BROTLI_BOOL compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op,
|
||||
std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
|
||||
std::vector<uint8_t>* output,
|
||||
uint8_t* input, size_t input_length) {
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
|
||||
@ -222,11 +223,15 @@ PyDoc_STRVAR(brotli_Compressor_process_doc,
|
||||
static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) {
|
||||
PyObject* ret = NULL;
|
||||
std::vector<uint8_t> output;
|
||||
uint8_t* input;
|
||||
size_t input_length;
|
||||
Py_buffer input;
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input);
|
||||
#else
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input);
|
||||
#endif
|
||||
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
@ -236,9 +241,10 @@ static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *ar
|
||||
}
|
||||
|
||||
ok = compress_stream(self->enc, BROTLI_OPERATION_PROCESS,
|
||||
&output, input, input_length);
|
||||
&output, static_cast<uint8_t*>(input.buf), input.len);
|
||||
|
||||
end:
|
||||
PyBuffer_Release(&input);
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
|
||||
} else {
|
||||
@ -387,7 +393,8 @@ static PyTypeObject brotli_CompressorType = {
|
||||
};
|
||||
|
||||
static BROTLI_BOOL decompress_stream(BrotliDecoderState* dec,
|
||||
std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
|
||||
std::vector<uint8_t>* output,
|
||||
uint8_t* input, size_t input_length) {
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
|
||||
@ -485,11 +492,15 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc,
|
||||
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) {
|
||||
PyObject* ret = NULL;
|
||||
std::vector<uint8_t> output;
|
||||
uint8_t* input;
|
||||
size_t input_length;
|
||||
Py_buffer input;
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input);
|
||||
#else
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input);
|
||||
#endif
|
||||
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
@ -498,10 +509,10 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject
|
||||
goto end;
|
||||
}
|
||||
|
||||
ok = decompress_stream(self->dec,
|
||||
&output, input, input_length);
|
||||
ok = decompress_stream(self->dec, &output, static_cast<uint8_t*>(input.buf), input.len);
|
||||
|
||||
end:
|
||||
PyBuffer_Release(&input);
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.empty() ? NULL : &output[0]), output.size());
|
||||
} else {
|
||||
@ -625,15 +636,21 @@ PyDoc_STRVAR(brotli_decompress__doc__,
|
||||
|
||||
static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
|
||||
PyObject *ret = NULL;
|
||||
const uint8_t *input;
|
||||
size_t length;
|
||||
Py_buffer input;
|
||||
const uint8_t* next_in;
|
||||
size_t available_in;
|
||||
int ok;
|
||||
|
||||
static const char *kwlist[] = {"string", NULL};
|
||||
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|:decompress",
|
||||
const_cast<char **>(kwlist),
|
||||
&input, &length);
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|:decompress",
|
||||
const_cast<char **>(kwlist), &input);
|
||||
#else
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|:decompress",
|
||||
const_cast<char **>(kwlist), &input);
|
||||
#endif
|
||||
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
@ -645,9 +662,11 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key
|
||||
BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0);
|
||||
|
||||
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
|
||||
next_in = static_cast<uint8_t*>(input.buf);
|
||||
available_in = input.len;
|
||||
while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
|
||||
size_t available_out = 0;
|
||||
result = BrotliDecoderDecompressStream(state, &length, &input,
|
||||
result = BrotliDecoderDecompressStream(state, &available_in, &next_in,
|
||||
&available_out, 0, 0);
|
||||
const uint8_t* next_out = BrotliDecoderTakeOutput(state, &available_out);
|
||||
if (available_out != 0)
|
||||
@ -659,6 +678,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key
|
||||
Py_END_ALLOW_THREADS
|
||||
/* <<< Pure C block end. Python GIL reacquired. */
|
||||
|
||||
PyBuffer_Release(&input);
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
|
||||
} else {
|
||||
|
@ -252,6 +252,7 @@ retry:
|
||||
if (best_cost == 0 || best_isle.lcp < MIN_MATCH) {
|
||||
if (min_count >= 8) {
|
||||
min_count = (min_count * 7) / 8;
|
||||
fprintf(stderr, "Retry: min_count=%d\n", min_count);
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
@ -261,8 +262,10 @@ retry:
|
||||
fprintf(stderr,
|
||||
"Savings: %zu+%zu, dictionary: %zu+%d\n",
|
||||
total_cost, best_cost, total, best_isle.lcp);
|
||||
memcpy(
|
||||
dictionary + total, full_text.data() + sa[best_isle.l], best_isle.lcp);
|
||||
for (size_t i = 0; i < best_isle.lcp; ++i) {
|
||||
dictionary[total + i] =
|
||||
static_cast<uint8_t>(full_text[sa[best_isle.l] + i]);
|
||||
}
|
||||
total += best_isle.lcp;
|
||||
total_cost += best_cost;
|
||||
cutMatch(&data, best_isle.l, best_isle.lcp, &sa, &lcp,
|
||||
|
Loading…
Reference in New Issue
Block a user