mirror of
https://github.com/google/brotli.git
synced 2024-11-08 13:20:05 +00:00
Python: Support streamed compression with the Compressor object (#448)
This adds `flush` and `finish` methods to the `Compressor` object in the extension module, renames the `compress` method to `process`, and updates that method to only process data. Now, one or more `process` calls followed by a `finish` call will be equivalent to a module-level `compress` call. Note: To maximize the compression efficiency (and match underlying Brotli behavior, the `Compressor` object `process` method does not guarantee all input is immediately written to output. To ensure immediate output, call `flush` to manually flush the compression buffer. Extraneous flushing can increase the size, but may be required when processing streaming data. Progress on #191
This commit is contained in:
parent
678f8627d3
commit
5632315d35
@ -2,7 +2,6 @@
|
||||
#include <Python.h>
|
||||
#include <bytesobject.h>
|
||||
#include <structmember.h>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include "../common/version.h"
|
||||
#include <brotli/decode.h>
|
||||
@ -88,6 +87,38 @@ static int lgblock_convertor(PyObject *o, int *lgblock) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static BROTLI_BOOL compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op,
|
||||
std::vector<uint8_t>* output, uint8_t* input, size_t input_length) {
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
size_t available_in = input_length;
|
||||
const uint8_t* next_in = input;
|
||||
size_t available_out = 0;
|
||||
uint8_t* next_out = NULL;
|
||||
|
||||
while (ok) {
|
||||
ok = BrotliEncoderCompressStream(enc, op,
|
||||
&available_in, &next_in,
|
||||
&available_out, &next_out, NULL);
|
||||
if (!ok)
|
||||
break;
|
||||
|
||||
size_t buffer_length = 0; // Request all available output.
|
||||
const uint8_t* buffer = BrotliEncoderTakeOutput(enc, &buffer_length);
|
||||
if (buffer_length) {
|
||||
(*output).insert((*output).end(), buffer, buffer + buffer_length);
|
||||
}
|
||||
|
||||
if (available_in || BrotliEncoderHasMoreOutput(enc)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Compressor_doc,
|
||||
"An object to compress a byte string.\n"
|
||||
"\n"
|
||||
@ -177,63 +208,132 @@ static int brotli_Compressor_init(brotli_Compressor *self, PyObject *args, PyObj
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Compressor_compress_doc,
|
||||
"Compress a byte string.\n"
|
||||
PyDoc_STRVAR(brotli_Compressor_process_doc,
|
||||
"Process \"string\" for compression, returning a string that contains \n"
|
||||
"compressed output data. This data should be concatenated to the output \n"
|
||||
"produced by any preceding calls to the \"process()\" or flush()\" methods. \n"
|
||||
"Some or all of the input may be kept in internal buffers for later \n"
|
||||
"processing, and the compressed output data may be empty until enough input \n"
|
||||
"has been accumulated.\n"
|
||||
"\n"
|
||||
"Signature:\n"
|
||||
" compress(string)\n"
|
||||
"\n"
|
||||
"Args:\n"
|
||||
" string (bytes): The input data.\n"
|
||||
" string (bytes): The input data\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" The compressed byte string.\n"
|
||||
" The compressed output data (bytes)\n"
|
||||
"\n"
|
||||
"Raises:\n"
|
||||
" brotli.error: If compression fails.\n");
|
||||
" brotli.error: If compression fails\n");
|
||||
|
||||
static PyObject* brotli_Compressor_compress(brotli_Compressor *self, PyObject *args) {
|
||||
static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) {
|
||||
PyObject* ret = NULL;
|
||||
std::vector<uint8_t> output;
|
||||
uint8_t* input;
|
||||
uint8_t* output = NULL;
|
||||
uint8_t* next_out;
|
||||
const uint8_t *next_in;
|
||||
size_t input_length;
|
||||
size_t output_length;
|
||||
size_t available_in;
|
||||
size_t available_out;
|
||||
int ok;
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
ok = PyArg_ParseTuple(args, "s#:compress", &input, &input_length);
|
||||
ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s#:process", &input, &input_length);
|
||||
if (!ok)
|
||||
return NULL;
|
||||
|
||||
output_length = input_length + (input_length >> 2) + 10240;
|
||||
|
||||
if (!self->enc) {
|
||||
ok = false;
|
||||
ok = BROTLI_FALSE;
|
||||
goto end;
|
||||
}
|
||||
|
||||
output = new uint8_t[output_length];
|
||||
available_out = output_length;
|
||||
next_out = output;
|
||||
available_in = input_length;
|
||||
next_in = input;
|
||||
|
||||
BrotliEncoderCompressStream(self->enc, BROTLI_OPERATION_FINISH,
|
||||
&available_in, &next_in,
|
||||
&available_out, &next_out, 0);
|
||||
ok = BrotliEncoderIsFinished(self->enc);
|
||||
ok = compress_stream(self->enc, BROTLI_OPERATION_PROCESS,
|
||||
&output, input, input_length);
|
||||
|
||||
end:
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)output, output_length - available_out);
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
|
||||
} else {
|
||||
PyErr_SetString(BrotliError, "BrotliCompressBuffer failed");
|
||||
PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while processing the stream");
|
||||
}
|
||||
|
||||
delete[] output;
|
||||
return ret;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Compressor_flush_doc,
|
||||
"Process all pending input, returning a string containing the remaining\n"
|
||||
"compressed data. This data should be concatenated to the output produced by\n"
|
||||
"any preceding calls to the \"process()\" or \"flush()\" methods.\n"
|
||||
"\n"
|
||||
"Signature:\n"
|
||||
" flush()\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" The compressed output data (bytes)\n"
|
||||
"\n"
|
||||
"Raises:\n"
|
||||
" brotli.error: If compression fails\n");
|
||||
|
||||
static PyObject* brotli_Compressor_flush(brotli_Compressor *self) {
|
||||
PyObject *ret = NULL;
|
||||
std::vector<uint8_t> output;
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
if (!self->enc) {
|
||||
ok = BROTLI_FALSE;
|
||||
goto end;
|
||||
}
|
||||
|
||||
ok = compress_stream(self->enc, BROTLI_OPERATION_FLUSH,
|
||||
&output, NULL, 0);
|
||||
|
||||
end:
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
|
||||
} else {
|
||||
PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while flushing the stream");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Compressor_finish_doc,
|
||||
"Process all pending input and complete all compression, returning a string\n"
|
||||
"containing the remaining compressed data. This data should be concatenated\n"
|
||||
"to the output produced by any preceding calls to the \"process()\" or\n"
|
||||
"\"flush()\" methods.\n"
|
||||
"After calling \"finish()\", the \"process()\" and \"flush()\" methods\n"
|
||||
"cannot be called again, and a new \"Compressor\" object should be created.\n"
|
||||
"\n"
|
||||
"Signature:\n"
|
||||
" finish(string)\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" The compressed output data (bytes)\n"
|
||||
"\n"
|
||||
"Raises:\n"
|
||||
" brotli.error: If compression fails\n");
|
||||
|
||||
static PyObject* brotli_Compressor_finish(brotli_Compressor *self) {
|
||||
PyObject *ret = NULL;
|
||||
std::vector<uint8_t> output;
|
||||
BROTLI_BOOL ok = BROTLI_TRUE;
|
||||
|
||||
if (!self->enc) {
|
||||
ok = BROTLI_FALSE;
|
||||
goto end;
|
||||
}
|
||||
|
||||
ok = compress_stream(self->enc, BROTLI_OPERATION_FINISH,
|
||||
&output, NULL, 0);
|
||||
|
||||
if (ok) {
|
||||
ok = BrotliEncoderIsFinished(self->enc);
|
||||
}
|
||||
|
||||
end:
|
||||
if (ok) {
|
||||
ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size());
|
||||
} else {
|
||||
PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while finishing the stream");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -243,7 +343,9 @@ static PyMemberDef brotli_Compressor_members[] = {
|
||||
};
|
||||
|
||||
static PyMethodDef brotli_Compressor_methods[] = {
|
||||
{"compress", (PyCFunction)brotli_Compressor_compress, METH_VARARGS, brotli_Compressor_compress_doc},
|
||||
{"process", (PyCFunction)brotli_Compressor_process, METH_VARARGS, brotli_Compressor_process_doc},
|
||||
{"flush", (PyCFunction)brotli_Compressor_flush, METH_NOARGS, brotli_Compressor_flush_doc},
|
||||
{"finish", (PyCFunction)brotli_Compressor_finish, METH_NOARGS, brotli_Compressor_finish_doc},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
@ -34,7 +34,7 @@ def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0,
|
||||
Range is 16 to 24. If set to 0, the value will be set based on the
|
||||
quality. Defaults to 0.
|
||||
dictionary (bytes, optional): Custom dictionary. Only last sliding window
|
||||
size bytes will be used.
|
||||
size bytes will be used.
|
||||
|
||||
Returns:
|
||||
The compressed byte string.
|
||||
@ -44,7 +44,7 @@ def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0,
|
||||
"""
|
||||
compressor = _brotli.Compressor(mode=mode, quality=quality, lgwin=lgwin,
|
||||
lgblock=lgblock, dictionary=dictionary)
|
||||
return compressor.compress(string)
|
||||
return compressor.process(string) + compressor.finish()
|
||||
|
||||
# Decompress a compressed byte string.
|
||||
decompress = _brotli.decompress
|
||||
|
Loading…
Reference in New Issue
Block a user