Merge pull request #321 from eustas/master

Add custom dictionary feature binding
This commit is contained in:
eustas 2016-02-24 16:54:54 +01:00
commit 7e5bbd5f9b
3 changed files with 98 additions and 17 deletions

View File

@ -79,6 +79,8 @@ def main(args=None):
help='Base 2 logarithm of the maximum input block size. ' help='Base 2 logarithm of the maximum input block size. '
'Range is 16 to 24. If set to 0, the value will be set based ' 'Range is 16 to 24. If set to 0, the value will be set based '
'on the quality. Defaults to 0.') 'on the quality. Defaults to 0.')
params.add_argument('--custom-dictionary', metavar="FILE", type=str, dest='dictfile',
help='Custom dictionary file.', default = None)
# set default values using global DEFAULT_PARAMS dictionary # set default values using global DEFAULT_PARAMS dictionary
parser.set_defaults(**DEFAULT_PARAMS) parser.set_defaults(**DEFAULT_PARAMS)
@ -103,13 +105,22 @@ def main(args=None):
else: else:
outfile = get_binary_stdio('stdout') outfile = get_binary_stdio('stdout')
if options.dictfile:
if not os.path.isfile(options.dictfile):
parser.error('file "%s" not found' % options.dictfile)
with open(options.dictfile, "rb") as dictfile:
custom_dictionary = dictfile.read()
else:
custom_dictionary = ''
try: try:
if options.decompress: if options.decompress:
data = brotli.decompress(data) data = brotli.decompress(data, dictionary=custom_dictionary)
else: else:
data = brotli.compress( data = brotli.compress(
data, mode=options.mode, quality=options.quality, data, mode=options.mode, quality=options.quality,
lgwin=options.lgwin, lgblock=options.lgblock) lgwin=options.lgwin, lgblock=options.lgblock, dictionary=custom_dictionary)
except brotli.error as e: except brotli.error as e:
parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin')) parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))

View File

@ -91,7 +91,7 @@ PyDoc_STRVAR(compress__doc__,
"Compress a byte string.\n" "Compress a byte string.\n"
"\n" "\n"
"Signature:\n" "Signature:\n"
" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n" " compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0, dictionary='')\n"
"\n" "\n"
"Args:\n" "Args:\n"
" string (bytes): The input data.\n" " string (bytes): The input data.\n"
@ -105,6 +105,8 @@ PyDoc_STRVAR(compress__doc__,
" lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n" " lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
" Range is 16 to 24. If set to 0, the value will be set based on the\n" " Range is 16 to 24. If set to 0, the value will be set based on the\n"
" quality. Defaults to 0.\n" " quality. Defaults to 0.\n"
" dictionary (bytes, optional): Custom dictionary. Only last sliding window\n"
" size bytes will be used.\n"
"\n" "\n"
"Returns:\n" "Returns:\n"
" The compressed byte string.\n" " The compressed byte string.\n"
@ -114,24 +116,28 @@ PyDoc_STRVAR(compress__doc__,
static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) { static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) {
PyObject *ret = NULL; PyObject *ret = NULL;
uint8_t *input, *output; uint8_t *input, *output, *custom_dictionary;
size_t length, output_length; size_t length, output_length, custom_dictionary_length;
BrotliParams::Mode mode = (BrotliParams::Mode) -1; BrotliParams::Mode mode = (BrotliParams::Mode) -1;
int quality = -1; int quality = -1;
int lgwin = -1; int lgwin = -1;
int lgblock = -1; int lgblock = -1;
int ok; int ok;
static const char *kwlist[] = {"string", "mode", "quality", "lgwin", "lgblock", NULL}; static const char *kwlist[] = {
"string", "mode", "quality", "lgwin", "lgblock", "dictionary", NULL};
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&:compress", custom_dictionary = NULL;
custom_dictionary_length = 0;
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&s#:compress",
const_cast<char **>(kwlist), const_cast<char **>(kwlist),
&input, &length, &input, &length,
&mode_convertor, &mode, &mode_convertor, &mode,
&quality_convertor, &quality, &quality_convertor, &quality,
&lgwin_convertor, &lgwin, &lgwin_convertor, &lgwin,
&lgblock_convertor, &lgblock); &lgblock_convertor, &lgblock,
&custom_dictionary, &custom_dictionary_length);
if (!ok) if (!ok)
return NULL; return NULL;
@ -148,8 +154,23 @@ static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywd
if (lgblock != -1) if (lgblock != -1)
params.lgblock = lgblock; params.lgblock = lgblock;
ok = BrotliCompressBuffer(params, length, input, if (custom_dictionary_length == 0) {
&output_length, output); ok = BrotliCompressBuffer(params, length, input,
&output_length, output);
} else {
uint8_t *custom_dictionary_start = custom_dictionary;
BrotliMemIn in(input, length);
BrotliMemOut out(output, output_length);
size_t sliding_window_size = ((size_t)1) << params.lgwin;
if (custom_dictionary_length > sliding_window_size) {
custom_dictionary_start += custom_dictionary_length - sliding_window_size;
custom_dictionary_length = sliding_window_size;
}
ok = BrotliCompressWithCustomDictionary(custom_dictionary_length,
custom_dictionary_start, params, &in, &out);
output_length = out.position();
}
if (ok) { if (ok) {
ret = PyBytes_FromStringAndSize((char*)output, output_length); ret = PyBytes_FromStringAndSize((char*)output, output_length);
} else { } else {
@ -169,6 +190,8 @@ PyDoc_STRVAR(decompress__doc__,
"\n" "\n"
"Args:\n" "Args:\n"
" string (bytes): The compressed input data.\n" " string (bytes): The compressed input data.\n"
" dictionary (bytes, optional): Custom dictionary. MUST be the same data\n"
" as passed to compress method.\n"
"\n" "\n"
"Returns:\n" "Returns:\n"
" The decompressed byte string.\n" " The decompressed byte string.\n"
@ -176,13 +199,21 @@ PyDoc_STRVAR(decompress__doc__,
"Raises:\n" "Raises:\n"
" brotli.error: If decompressor fails.\n"); " brotli.error: If decompressor fails.\n");
static PyObject* brotli_decompress(PyObject *self, PyObject *args) { static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
PyObject *ret = NULL; PyObject *ret = NULL;
const uint8_t *input; const uint8_t *input, *custom_dictionary;
size_t length; size_t length, custom_dictionary_length;
int ok; int ok;
ok = PyArg_ParseTuple(args, "s#:decompress", &input, &length); static const char *kwlist[] = {"string", "dictionary", NULL};
custom_dictionary = NULL;
custom_dictionary_length = 0;
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|s#:decompress",
const_cast<char **>(kwlist),
&input, &length,
&custom_dictionary, &custom_dictionary_length);
if (!ok) if (!ok)
return NULL; return NULL;
@ -191,7 +222,10 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
uint8_t* buffer = new uint8_t[kBufferSize]; uint8_t* buffer = new uint8_t[kBufferSize];
BrotliState state; BrotliState state;
BrotliStateInit(&state); BrotliStateInit(&state);
if (custom_dictionary_length != 0) {
BrotliSetCustomDictionary(custom_dictionary_length, custom_dictionary, &state);
}
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT; BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT;
while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) { while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
size_t available_out = kBufferSize; size_t available_out = kBufferSize;
@ -219,7 +253,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
static PyMethodDef brotli_methods[] = { static PyMethodDef brotli_methods[] = {
{"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__}, {"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
{"decompress", brotli_decompress, METH_VARARGS, decompress__doc__}, {"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, decompress__doc__},
{NULL, NULL, 0, NULL} {NULL, NULL, 0, NULL}
}; };

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
from __future__ import print_function
import sys
import os
from subprocess import check_call, Popen, PIPE
from test_utils import PYTHON, BRO, TEST_ENV, diff_q
INPUTS = """\
testdata/alice29.txt
testdata/asyoulik.txt
testdata/lcet10.txt
testdata/plrabn12.txt
../enc/encode.cc
../enc/dictionary.h
../dec/decode.c
%s
""" % BRO
os.chdir(os.path.abspath("../../tests"))
for filename in INPUTS.splitlines():
for quality in (1, 6, 9, 11):
for lgwin in (10, 15, 20, 24):
filename = os.path.abspath(filename)
print('Roundtrip testing file "%s" at quality %d with lg(win)=%d and auto-custom-dictionary' %
(os.path.basename(filename), quality, lgwin))
compressed = os.path.splitext(filename)[0] + ".custom_bro"
uncompressed = os.path.splitext(filename)[0] + ".custom_unbro"
check_call([PYTHON, BRO, "-f", "-q", str(quality), "-i", filename,
"-o", compressed, "--lgwin", str(lgwin),
"--custom-dictionary", filename], env=TEST_ENV)
check_call([PYTHON, BRO, "-f", "-d", "-i", compressed, "-o",
uncompressed, "--custom-dictionary", filename], env=TEST_ENV)
if diff_q(filename, uncompressed) != 0:
sys.exit(1)