mirror of
https://github.com/google/brotli.git
synced 2024-11-25 21:10:05 +00:00
Merge pull request #321 from eustas/master
Add custom dictionary feature binding
This commit is contained in:
commit
7e5bbd5f9b
@ -79,6 +79,8 @@ def main(args=None):
|
|||||||
help='Base 2 logarithm of the maximum input block size. '
|
help='Base 2 logarithm of the maximum input block size. '
|
||||||
'Range is 16 to 24. If set to 0, the value will be set based '
|
'Range is 16 to 24. If set to 0, the value will be set based '
|
||||||
'on the quality. Defaults to 0.')
|
'on the quality. Defaults to 0.')
|
||||||
|
params.add_argument('--custom-dictionary', metavar="FILE", type=str, dest='dictfile',
|
||||||
|
help='Custom dictionary file.', default = None)
|
||||||
# set default values using global DEFAULT_PARAMS dictionary
|
# set default values using global DEFAULT_PARAMS dictionary
|
||||||
parser.set_defaults(**DEFAULT_PARAMS)
|
parser.set_defaults(**DEFAULT_PARAMS)
|
||||||
|
|
||||||
@ -103,13 +105,22 @@ def main(args=None):
|
|||||||
else:
|
else:
|
||||||
outfile = get_binary_stdio('stdout')
|
outfile = get_binary_stdio('stdout')
|
||||||
|
|
||||||
|
if options.dictfile:
|
||||||
|
if not os.path.isfile(options.dictfile):
|
||||||
|
parser.error('file "%s" not found' % options.dictfile)
|
||||||
|
with open(options.dictfile, "rb") as dictfile:
|
||||||
|
custom_dictionary = dictfile.read()
|
||||||
|
else:
|
||||||
|
custom_dictionary = ''
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if options.decompress:
|
if options.decompress:
|
||||||
data = brotli.decompress(data)
|
data = brotli.decompress(data, dictionary=custom_dictionary)
|
||||||
else:
|
else:
|
||||||
data = brotli.compress(
|
data = brotli.compress(
|
||||||
data, mode=options.mode, quality=options.quality,
|
data, mode=options.mode, quality=options.quality,
|
||||||
lgwin=options.lgwin, lgblock=options.lgblock)
|
lgwin=options.lgwin, lgblock=options.lgblock, dictionary=custom_dictionary)
|
||||||
except brotli.error as e:
|
except brotli.error as e:
|
||||||
parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
|
parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ PyDoc_STRVAR(compress__doc__,
|
|||||||
"Compress a byte string.\n"
|
"Compress a byte string.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Signature:\n"
|
"Signature:\n"
|
||||||
" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n"
|
" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0, dictionary='')\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Args:\n"
|
"Args:\n"
|
||||||
" string (bytes): The input data.\n"
|
" string (bytes): The input data.\n"
|
||||||
@ -105,6 +105,8 @@ PyDoc_STRVAR(compress__doc__,
|
|||||||
" lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
|
" lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
|
||||||
" Range is 16 to 24. If set to 0, the value will be set based on the\n"
|
" Range is 16 to 24. If set to 0, the value will be set based on the\n"
|
||||||
" quality. Defaults to 0.\n"
|
" quality. Defaults to 0.\n"
|
||||||
|
" dictionary (bytes, optional): Custom dictionary. Only last sliding window\n"
|
||||||
|
" size bytes will be used.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Returns:\n"
|
"Returns:\n"
|
||||||
" The compressed byte string.\n"
|
" The compressed byte string.\n"
|
||||||
@ -114,24 +116,28 @@ PyDoc_STRVAR(compress__doc__,
|
|||||||
|
|
||||||
static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) {
|
static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) {
|
||||||
PyObject *ret = NULL;
|
PyObject *ret = NULL;
|
||||||
uint8_t *input, *output;
|
uint8_t *input, *output, *custom_dictionary;
|
||||||
size_t length, output_length;
|
size_t length, output_length, custom_dictionary_length;
|
||||||
BrotliParams::Mode mode = (BrotliParams::Mode) -1;
|
BrotliParams::Mode mode = (BrotliParams::Mode) -1;
|
||||||
int quality = -1;
|
int quality = -1;
|
||||||
int lgwin = -1;
|
int lgwin = -1;
|
||||||
int lgblock = -1;
|
int lgblock = -1;
|
||||||
int ok;
|
int ok;
|
||||||
|
|
||||||
static const char *kwlist[] = {"string", "mode", "quality", "lgwin", "lgblock", NULL};
|
static const char *kwlist[] = {
|
||||||
|
"string", "mode", "quality", "lgwin", "lgblock", "dictionary", NULL};
|
||||||
|
|
||||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&:compress",
|
custom_dictionary = NULL;
|
||||||
|
custom_dictionary_length = 0;
|
||||||
|
|
||||||
|
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&s#:compress",
|
||||||
const_cast<char **>(kwlist),
|
const_cast<char **>(kwlist),
|
||||||
&input, &length,
|
&input, &length,
|
||||||
&mode_convertor, &mode,
|
&mode_convertor, &mode,
|
||||||
&quality_convertor, &quality,
|
&quality_convertor, &quality,
|
||||||
&lgwin_convertor, &lgwin,
|
&lgwin_convertor, &lgwin,
|
||||||
&lgblock_convertor, &lgblock);
|
&lgblock_convertor, &lgblock,
|
||||||
|
&custom_dictionary, &custom_dictionary_length);
|
||||||
if (!ok)
|
if (!ok)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -148,8 +154,23 @@ static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywd
|
|||||||
if (lgblock != -1)
|
if (lgblock != -1)
|
||||||
params.lgblock = lgblock;
|
params.lgblock = lgblock;
|
||||||
|
|
||||||
ok = BrotliCompressBuffer(params, length, input,
|
if (custom_dictionary_length == 0) {
|
||||||
&output_length, output);
|
ok = BrotliCompressBuffer(params, length, input,
|
||||||
|
&output_length, output);
|
||||||
|
} else {
|
||||||
|
uint8_t *custom_dictionary_start = custom_dictionary;
|
||||||
|
BrotliMemIn in(input, length);
|
||||||
|
BrotliMemOut out(output, output_length);
|
||||||
|
size_t sliding_window_size = ((size_t)1) << params.lgwin;
|
||||||
|
if (custom_dictionary_length > sliding_window_size) {
|
||||||
|
custom_dictionary_start += custom_dictionary_length - sliding_window_size;
|
||||||
|
custom_dictionary_length = sliding_window_size;
|
||||||
|
}
|
||||||
|
ok = BrotliCompressWithCustomDictionary(custom_dictionary_length,
|
||||||
|
custom_dictionary_start, params, &in, &out);
|
||||||
|
output_length = out.position();
|
||||||
|
}
|
||||||
|
|
||||||
if (ok) {
|
if (ok) {
|
||||||
ret = PyBytes_FromStringAndSize((char*)output, output_length);
|
ret = PyBytes_FromStringAndSize((char*)output, output_length);
|
||||||
} else {
|
} else {
|
||||||
@ -169,6 +190,8 @@ PyDoc_STRVAR(decompress__doc__,
|
|||||||
"\n"
|
"\n"
|
||||||
"Args:\n"
|
"Args:\n"
|
||||||
" string (bytes): The compressed input data.\n"
|
" string (bytes): The compressed input data.\n"
|
||||||
|
" dictionary (bytes, optional): Custom dictionary. MUST be the same data\n"
|
||||||
|
" as passed to compress method.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Returns:\n"
|
"Returns:\n"
|
||||||
" The decompressed byte string.\n"
|
" The decompressed byte string.\n"
|
||||||
@ -176,13 +199,21 @@ PyDoc_STRVAR(decompress__doc__,
|
|||||||
"Raises:\n"
|
"Raises:\n"
|
||||||
" brotli.error: If decompressor fails.\n");
|
" brotli.error: If decompressor fails.\n");
|
||||||
|
|
||||||
static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
|
||||||
PyObject *ret = NULL;
|
PyObject *ret = NULL;
|
||||||
const uint8_t *input;
|
const uint8_t *input, *custom_dictionary;
|
||||||
size_t length;
|
size_t length, custom_dictionary_length;
|
||||||
int ok;
|
int ok;
|
||||||
|
|
||||||
ok = PyArg_ParseTuple(args, "s#:decompress", &input, &length);
|
static const char *kwlist[] = {"string", "dictionary", NULL};
|
||||||
|
|
||||||
|
custom_dictionary = NULL;
|
||||||
|
custom_dictionary_length = 0;
|
||||||
|
|
||||||
|
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|s#:decompress",
|
||||||
|
const_cast<char **>(kwlist),
|
||||||
|
&input, &length,
|
||||||
|
&custom_dictionary, &custom_dictionary_length);
|
||||||
if (!ok)
|
if (!ok)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -191,7 +222,10 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
|||||||
uint8_t* buffer = new uint8_t[kBufferSize];
|
uint8_t* buffer = new uint8_t[kBufferSize];
|
||||||
BrotliState state;
|
BrotliState state;
|
||||||
BrotliStateInit(&state);
|
BrotliStateInit(&state);
|
||||||
|
if (custom_dictionary_length != 0) {
|
||||||
|
BrotliSetCustomDictionary(custom_dictionary_length, custom_dictionary, &state);
|
||||||
|
}
|
||||||
|
|
||||||
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT;
|
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT;
|
||||||
while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
|
while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
|
||||||
size_t available_out = kBufferSize;
|
size_t available_out = kBufferSize;
|
||||||
@ -219,7 +253,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
|||||||
|
|
||||||
static PyMethodDef brotli_methods[] = {
|
static PyMethodDef brotli_methods[] = {
|
||||||
{"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
|
{"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
|
||||||
{"decompress", brotli_decompress, METH_VARARGS, decompress__doc__},
|
{"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, decompress__doc__},
|
||||||
{NULL, NULL, 0, NULL}
|
{NULL, NULL, 0, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
36
python/tests/custom_dictionary_test.py
Normal file
36
python/tests/custom_dictionary_test.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from subprocess import check_call, Popen, PIPE
|
||||||
|
|
||||||
|
from test_utils import PYTHON, BRO, TEST_ENV, diff_q
|
||||||
|
|
||||||
|
|
||||||
|
INPUTS = """\
|
||||||
|
testdata/alice29.txt
|
||||||
|
testdata/asyoulik.txt
|
||||||
|
testdata/lcet10.txt
|
||||||
|
testdata/plrabn12.txt
|
||||||
|
../enc/encode.cc
|
||||||
|
../enc/dictionary.h
|
||||||
|
../dec/decode.c
|
||||||
|
%s
|
||||||
|
""" % BRO
|
||||||
|
|
||||||
|
os.chdir(os.path.abspath("../../tests"))
|
||||||
|
for filename in INPUTS.splitlines():
|
||||||
|
for quality in (1, 6, 9, 11):
|
||||||
|
for lgwin in (10, 15, 20, 24):
|
||||||
|
filename = os.path.abspath(filename)
|
||||||
|
print('Roundtrip testing file "%s" at quality %d with lg(win)=%d and auto-custom-dictionary' %
|
||||||
|
(os.path.basename(filename), quality, lgwin))
|
||||||
|
compressed = os.path.splitext(filename)[0] + ".custom_bro"
|
||||||
|
uncompressed = os.path.splitext(filename)[0] + ".custom_unbro"
|
||||||
|
check_call([PYTHON, BRO, "-f", "-q", str(quality), "-i", filename,
|
||||||
|
"-o", compressed, "--lgwin", str(lgwin),
|
||||||
|
"--custom-dictionary", filename], env=TEST_ENV)
|
||||||
|
check_call([PYTHON, BRO, "-f", "-d", "-i", compressed, "-o",
|
||||||
|
uncompressed, "--custom-dictionary", filename], env=TEST_ENV)
|
||||||
|
if diff_q(filename, uncompressed) != 0:
|
||||||
|
sys.exit(1)
|
Loading…
Reference in New Issue
Block a user