mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
add max_length to Python streaming decompression
TODO: tests
This commit is contained in:
parent
f1bdfaa803
commit
762ae626ef
189
python/_brotli.c
189
python/_brotli.c
@ -606,57 +606,6 @@ static PyTypeObject brotli_CompressorType = {
|
||||
brotli_Compressor_new, /* tp_new */
|
||||
};
|
||||
|
||||
static PyObject* decompress_stream(BrotliDecoderState* dec,
|
||||
uint8_t* input, size_t input_length) {
|
||||
BrotliDecoderResult result;
|
||||
|
||||
size_t available_in = input_length;
|
||||
const uint8_t* next_in = input;
|
||||
|
||||
size_t available_out;
|
||||
uint8_t* next_out;
|
||||
BlocksOutputBuffer buffer = {.list=NULL};
|
||||
PyObject *ret;
|
||||
|
||||
if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
result = BrotliDecoderDecompressStream(dec,
|
||||
&available_in, &next_in,
|
||||
&available_out, &next_out, NULL);
|
||||
Py_END_ALLOW_THREADS
|
||||
|
||||
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
|
||||
if (available_out == 0) {
|
||||
if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = BlocksOutputBuffer_Finish(&buffer, available_out);
|
||||
if (ret != NULL) {
|
||||
goto finally;
|
||||
}
|
||||
|
||||
error:
|
||||
BlocksOutputBuffer_OnError(&buffer);
|
||||
ret = NULL;
|
||||
finally:
|
||||
return ret;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Decompressor_doc,
|
||||
"An object to decompress a byte string.\n"
|
||||
"\n"
|
||||
@ -669,10 +618,14 @@ PyDoc_STRVAR(brotli_Decompressor_doc,
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
BrotliDecoderState* dec;
|
||||
uint8_t* unconsumed_data;
|
||||
size_t unconsumed_data_length;
|
||||
} brotli_Decompressor;
|
||||
|
||||
static void brotli_Decompressor_dealloc(brotli_Decompressor* self) {
|
||||
BrotliDecoderDestroyInstance(self->dec);
|
||||
if (self->unconsumed_data)
|
||||
free(self->unconsumed_data);
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
#else
|
||||
@ -688,6 +641,9 @@ static PyObject* brotli_Decompressor_new(PyTypeObject *type, PyObject *args, PyO
|
||||
self->dec = BrotliDecoderCreateInstance(0, 0, 0);
|
||||
}
|
||||
|
||||
self->unconsumed_data = NULL;
|
||||
self->unconsumed_data_length = 0;
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
@ -706,6 +662,75 @@ static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, P
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject* decompress_stream(brotli_Decompressor* self,
|
||||
uint8_t* input, size_t input_length, Py_ssize_t max_output_length) {
|
||||
BrotliDecoderResult result;
|
||||
|
||||
size_t available_in = input_length;
|
||||
const uint8_t* next_in = input;
|
||||
|
||||
size_t available_out;
|
||||
uint8_t* next_out;
|
||||
BlocksOutputBuffer buffer = {.list=NULL};
|
||||
PyObject *ret;
|
||||
|
||||
if (BlocksOutputBuffer_InitAndGrow(&buffer, max_output_length, &available_out, &next_out) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
result = BrotliDecoderDecompressStream(self->dec,
|
||||
&available_in, &next_in,
|
||||
&available_out, &next_out, NULL);
|
||||
Py_END_ALLOW_THREADS
|
||||
|
||||
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
|
||||
if (available_out == 0) {
|
||||
if (buffer.allocated == PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
|
||||
goto error;
|
||||
}
|
||||
if (buffer.allocated == max_output_length) {
|
||||
// We've reached the output length limit.
|
||||
|
||||
uint8_t* new_tail = available_in > 0 ? malloc(available_in) : NULL;
|
||||
if (available_in > 0) {
|
||||
memcpy(new_tail, next_in, available_in);
|
||||
}
|
||||
if (self->unconsumed_data) {
|
||||
free(self->unconsumed_data);
|
||||
}
|
||||
self->unconsumed_data = new_tail;
|
||||
break;
|
||||
}
|
||||
if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
ret = BlocksOutputBuffer_Finish(&buffer, available_out);
|
||||
if (ret != NULL) {
|
||||
goto finally;
|
||||
}
|
||||
|
||||
error:
|
||||
BlocksOutputBuffer_OnError(&buffer);
|
||||
ret = NULL;
|
||||
finally:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(brotli_Decompressor_process_doc,
|
||||
"Process \"string\" for decompression, returning a string that contains \n"
|
||||
"decompressed output data. This data should be concatenated to the output \n"
|
||||
@ -713,28 +738,39 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc,
|
||||
"Some or all of the input may be kept in internal buffers for later \n"
|
||||
"processing, and the decompressed output data may be empty until enough input \n"
|
||||
"has been accumulated.\n"
|
||||
"If max_output_length is set, no more than max_output_length bytes will be\n"
|
||||
"returned. If the limit is reached (or an implicit limit of the largest\n"
|
||||
"constructible string is reached), further calls to process (potentially with\n"
|
||||
"empty input) will continue to yield more data. If, after returning a string of\n"
|
||||
"the length equal to limit, can_accept_more_data() returns False, process()\n"
|
||||
"must only be called with empty input until can_accept_more_data() once again\n"
|
||||
"returns True.\n"
|
||||
"\n"
|
||||
"Signature:\n"
|
||||
" decompress(string)\n"
|
||||
" decompress(string, max_output_length=int)\n"
|
||||
"\n"
|
||||
"Args:\n"
|
||||
" string (bytes): The input data\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
"\n""Returns:\n"
|
||||
" The decompressed output data (bytes)\n"
|
||||
"\n"
|
||||
"Raises:\n"
|
||||
" brotli.error: If decompression fails\n");
|
||||
|
||||
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) {
|
||||
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args, PyObject* keywds) {
|
||||
PyObject* ret;
|
||||
Py_buffer input;
|
||||
int ok;
|
||||
Py_ssize_t max_output_length = PY_SSIZE_T_MAX;
|
||||
uint8_t* data;
|
||||
size_t data_length;
|
||||
|
||||
static char* kwlist[] = { "", "max_output_length", NULL };
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
ok = PyArg_ParseTuple(args, "y*:process", &input);
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|n:process", kwlist, &input, &max_output_length);
|
||||
#else
|
||||
ok = PyArg_ParseTuple(args, "s*:process", &input);
|
||||
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|n:process", kwlist, &input, &max_output_length);
|
||||
#endif
|
||||
|
||||
if (!ok) {
|
||||
@ -745,7 +781,20 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len);
|
||||
if (self->unconsumed_data_length > 0) {
|
||||
if (input.len > 0) {
|
||||
PyErr_SetString(BrotliError, "process called with data when accept_more_data is False");
|
||||
ret = NULL;
|
||||
goto finally;
|
||||
}
|
||||
data = self->unconsumed_data;
|
||||
data_length = self->unconsumed_data_length;
|
||||
} else {
|
||||
data = (uint8_t*)input.buf;
|
||||
data_length = input.len;
|
||||
}
|
||||
|
||||
ret = decompress_stream(self, data, data_length, max_output_length);
|
||||
if (ret != NULL) {
|
||||
goto finally;
|
||||
}
|
||||
@ -787,13 +836,35 @@ static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) {
|
||||
}
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(brotli_Decompressor_can_accept_more_data_doc,
|
||||
"Checks if the decoder instance can accept more compressed data. If the decompress()\n"
|
||||
"method on this instance of decompressor was never called with max_length,\n"
|
||||
"this method will always return True.\n"
|
||||
"\n"
|
||||
"Signature:"
|
||||
" can_accept_more_data()\n"
|
||||
"\n"
|
||||
"Returns:\n"
|
||||
" True if the decoder is ready to accept more compressed data via decompress()\n"
|
||||
" False if the decoder needs to output some data via decompress(b'') before\n"
|
||||
" being provided any more compressed data\n");
|
||||
|
||||
static PyObject* brotli_Decompressor_can_accept_more_data(brotli_Decompressor* self) {
|
||||
if (self->unconsumed_data_length > 0) {
|
||||
Py_RETURN_FALSE;
|
||||
} else {
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static PyMemberDef brotli_Decompressor_members[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyMethodDef brotli_Decompressor_methods[] = {
|
||||
{"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc},
|
||||
{"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS | METH_KEYWORDS, brotli_Decompressor_process_doc},
|
||||
{"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc},
|
||||
{"can_accept_more_data", (PyCFunction)brotli_Decompressor_can_accept_more_data, METH_NOARGS, brotli_Decompressor_can_accept_more_data_doc},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user