mirror of
https://github.com/google/brotli.git
synced 2024-11-25 21:10:05 +00:00
Merge pull request #321 from eustas/master
Add custom dictionary feature binding
This commit is contained in:
commit
7e5bbd5f9b
@ -79,6 +79,8 @@ def main(args=None):
|
||||
help='Base 2 logarithm of the maximum input block size. '
|
||||
'Range is 16 to 24. If set to 0, the value will be set based '
|
||||
'on the quality. Defaults to 0.')
|
||||
params.add_argument('--custom-dictionary', metavar="FILE", type=str, dest='dictfile',
|
||||
help='Custom dictionary file.', default = None)
|
||||
# set default values using global DEFAULT_PARAMS dictionary
|
||||
parser.set_defaults(**DEFAULT_PARAMS)
|
||||
|
||||
@ -103,13 +105,22 @@ def main(args=None):
|
||||
else:
|
||||
outfile = get_binary_stdio('stdout')
|
||||
|
||||
if options.dictfile:
|
||||
if not os.path.isfile(options.dictfile):
|
||||
parser.error('file "%s" not found' % options.dictfile)
|
||||
with open(options.dictfile, "rb") as dictfile:
|
||||
custom_dictionary = dictfile.read()
|
||||
else:
|
||||
custom_dictionary = ''
|
||||
|
||||
|
||||
try:
|
||||
if options.decompress:
|
||||
data = brotli.decompress(data)
|
||||
data = brotli.decompress(data, dictionary=custom_dictionary)
|
||||
else:
|
||||
data = brotli.compress(
|
||||
data, mode=options.mode, quality=options.quality,
|
||||
lgwin=options.lgwin, lgblock=options.lgblock)
|
||||
lgwin=options.lgwin, lgblock=options.lgblock, dictionary=custom_dictionary)
|
||||
except brotli.error as e:
|
||||
parser.exit(1,'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
|
||||
|
||||
|
@ -91,7 +91,7 @@ PyDoc_STRVAR(compress__doc__,
|
||||
"Compress a byte string.\n"
|
||||
"\n"
|
||||
"Signature:\n"
|
||||
" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0)\n"
|
||||
" compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0, dictionary='')\n"
|
||||
"\n"
|
||||
"Args:\n"
|
||||
" string (bytes): The input data.\n"
|
||||
@ -105,6 +105,8 @@ PyDoc_STRVAR(compress__doc__,
|
||||
" lgblock (int, optional): Base 2 logarithm of the maximum input block size.\n"
|
||||
" Range is 16 to 24. If set to 0, the value will be set based on the\n"
|
||||
" quality. Defaults to 0.\n"
|
||||
" dictionary (bytes, optional): Custom dictionary. Only last sliding window\n"
|
||||
" size bytes will be used.\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" The compressed byte string.\n"
|
||||
@ -114,24 +116,28 @@ PyDoc_STRVAR(compress__doc__,
|
||||
|
||||
static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywds) {
|
||||
PyObject *ret = NULL;
|
||||
uint8_t *input, *output;
|
||||
size_t length, output_length;
|
||||
uint8_t *input, *output, *custom_dictionary;
|
||||
size_t length, output_length, custom_dictionary_length;
|
||||
BrotliParams::Mode mode = (BrotliParams::Mode) -1;
|
||||
int quality = -1;
|
||||
int lgwin = -1;
|
||||
int lgblock = -1;
|
||||
int ok;
|
||||
|
||||
static const char *kwlist[] = {"string", "mode", "quality", "lgwin", "lgblock", NULL};
|
||||
static const char *kwlist[] = {
|
||||
"string", "mode", "quality", "lgwin", "lgblock", "dictionary", NULL};
|
||||
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&:compress",
|
||||
custom_dictionary = NULL;
|
||||
custom_dictionary_length = 0;
|
||||
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|O&O&O&O&s#:compress",
|
||||
const_cast<char **>(kwlist),
|
||||
&input, &length,
|
||||
&mode_convertor, &mode,
|
||||
&quality_convertor, &quality,
|
||||
&lgwin_convertor, &lgwin,
|
||||
&lgblock_convertor, &lgblock);
|
||||
|
||||
&lgblock_convertor, &lgblock,
|
||||
&custom_dictionary, &custom_dictionary_length);
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
@ -148,8 +154,23 @@ static PyObject* brotli_compress(PyObject *self, PyObject *args, PyObject *keywd
|
||||
if (lgblock != -1)
|
||||
params.lgblock = lgblock;
|
||||
|
||||
ok = BrotliCompressBuffer(params, length, input,
|
||||
&output_length, output);
|
||||
if (custom_dictionary_length == 0) {
|
||||
ok = BrotliCompressBuffer(params, length, input,
|
||||
&output_length, output);
|
||||
} else {
|
||||
uint8_t *custom_dictionary_start = custom_dictionary;
|
||||
BrotliMemIn in(input, length);
|
||||
BrotliMemOut out(output, output_length);
|
||||
size_t sliding_window_size = ((size_t)1) << params.lgwin;
|
||||
if (custom_dictionary_length > sliding_window_size) {
|
||||
custom_dictionary_start += custom_dictionary_length - sliding_window_size;
|
||||
custom_dictionary_length = sliding_window_size;
|
||||
}
|
||||
ok = BrotliCompressWithCustomDictionary(custom_dictionary_length,
|
||||
custom_dictionary_start, params, &in, &out);
|
||||
output_length = out.position();
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)output, output_length);
|
||||
} else {
|
||||
@ -169,6 +190,8 @@ PyDoc_STRVAR(decompress__doc__,
|
||||
"\n"
|
||||
"Args:\n"
|
||||
" string (bytes): The compressed input data.\n"
|
||||
" dictionary (bytes, optional): Custom dictionary. MUST be the same data\n"
|
||||
" as passed to compress method.\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" The decompressed byte string.\n"
|
||||
@ -176,13 +199,21 @@ PyDoc_STRVAR(decompress__doc__,
|
||||
"Raises:\n"
|
||||
" brotli.error: If decompressor fails.\n");
|
||||
|
||||
static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
||||
static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) {
|
||||
PyObject *ret = NULL;
|
||||
const uint8_t *input;
|
||||
size_t length;
|
||||
const uint8_t *input, *custom_dictionary;
|
||||
size_t length, custom_dictionary_length;
|
||||
int ok;
|
||||
|
||||
ok = PyArg_ParseTuple(args, "s#:decompress", &input, &length);
|
||||
static const char *kwlist[] = {"string", "dictionary", NULL};
|
||||
|
||||
custom_dictionary = NULL;
|
||||
custom_dictionary_length = 0;
|
||||
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s#|s#:decompress",
|
||||
const_cast<char **>(kwlist),
|
||||
&input, &length,
|
||||
&custom_dictionary, &custom_dictionary_length);
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
@ -191,7 +222,10 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
||||
uint8_t* buffer = new uint8_t[kBufferSize];
|
||||
BrotliState state;
|
||||
BrotliStateInit(&state);
|
||||
|
||||
if (custom_dictionary_length != 0) {
|
||||
BrotliSetCustomDictionary(custom_dictionary_length, custom_dictionary, &state);
|
||||
}
|
||||
|
||||
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_OUTPUT;
|
||||
while (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
|
||||
size_t available_out = kBufferSize;
|
||||
@ -219,7 +253,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args) {
|
||||
|
||||
static PyMethodDef brotli_methods[] = {
|
||||
{"compress", (PyCFunction)brotli_compress, METH_VARARGS | METH_KEYWORDS, compress__doc__},
|
||||
{"decompress", brotli_decompress, METH_VARARGS, decompress__doc__},
|
||||
{"decompress", (PyCFunction)brotli_decompress, METH_VARARGS | METH_KEYWORDS, decompress__doc__},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
|
36
python/tests/custom_dictionary_test.py
Normal file
36
python/tests/custom_dictionary_test.py
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
from subprocess import check_call, Popen, PIPE
|
||||
|
||||
from test_utils import PYTHON, BRO, TEST_ENV, diff_q
|
||||
|
||||
|
||||
INPUTS = """\
|
||||
testdata/alice29.txt
|
||||
testdata/asyoulik.txt
|
||||
testdata/lcet10.txt
|
||||
testdata/plrabn12.txt
|
||||
../enc/encode.cc
|
||||
../enc/dictionary.h
|
||||
../dec/decode.c
|
||||
%s
|
||||
""" % BRO
|
||||
|
||||
os.chdir(os.path.abspath("../../tests"))
|
||||
for filename in INPUTS.splitlines():
|
||||
for quality in (1, 6, 9, 11):
|
||||
for lgwin in (10, 15, 20, 24):
|
||||
filename = os.path.abspath(filename)
|
||||
print('Roundtrip testing file "%s" at quality %d with lg(win)=%d and auto-custom-dictionary' %
|
||||
(os.path.basename(filename), quality, lgwin))
|
||||
compressed = os.path.splitext(filename)[0] + ".custom_bro"
|
||||
uncompressed = os.path.splitext(filename)[0] + ".custom_unbro"
|
||||
check_call([PYTHON, BRO, "-f", "-q", str(quality), "-i", filename,
|
||||
"-o", compressed, "--lgwin", str(lgwin),
|
||||
"--custom-dictionary", filename], env=TEST_ENV)
|
||||
check_call([PYTHON, BRO, "-f", "-d", "-i", compressed, "-o",
|
||||
uncompressed, "--custom-dictionary", filename], env=TEST_ENV)
|
||||
if diff_q(filename, uncompressed) != 0:
|
||||
sys.exit(1)
|
Loading…
Reference in New Issue
Block a user