Kaydet (Commit) 26795baa authored tarafından Antoine Pitrou's avatar Antoine Pitrou

Issue #15955: Add an option to limit output size when decompressing LZMA data.

Patch by Nikolaus Rath and Martin Panter.
üst e262074e
...@@ -221,13 +221,32 @@ Compressing and decompressing data in memory ...@@ -221,13 +221,32 @@ Compressing and decompressing data in memory
decompress a multi-stream input with :class:`LZMADecompressor`, you must decompress a multi-stream input with :class:`LZMADecompressor`, you must
create a new decompressor for each stream. create a new decompressor for each stream.
.. method:: decompress(data) .. method:: decompress(data, max_length=-1)
Decompress *data* (a :class:`bytes` object), returning a :class:`bytes` Decompress *data* (a :term:`bytes-like object`), returning
object containing the decompressed data for at least part of the input. uncompressed data as bytes. Some of *data* may be buffered
Some of *data* may be buffered internally, for use in later calls to internally, for use in later calls to :meth:`decompress`. The
:meth:`decompress`. The returned data should be concatenated with the returned data should be concatenated with the output of any
output of any previous calls to :meth:`decompress`. previous calls to :meth:`decompress`.
If *max_length* is nonnegative, returns at most *max_length*
bytes of decompressed data. If this limit is reached and further
output can be produced, the :attr:`~.needs_input` attribute will
be set to ``False``. In this case, the next call to
:meth:`~.decompress` may provide *data* as ``b''`` to obtain
more of the output.
If all of the input data was decompressed and returned (either
because this was less than *max_length* bytes, or because
*max_length* was negative), the :attr:`~.needs_input` attribute
will be set to ``True``.
Attempting to decompress data after the end of stream is reached
raises an `EOFError`. Any data found after the end of the
stream is ignored and saved in the :attr:`~.unused_data` attribute.
.. versionchanged:: 3.5
Added the *max_length* parameter.
.. attribute:: check .. attribute:: check
...@@ -245,6 +264,12 @@ Compressing and decompressing data in memory ...@@ -245,6 +264,12 @@ Compressing and decompressing data in memory
Before the end of the stream is reached, this will be ``b""``. Before the end of the stream is reached, this will be ``b""``.
.. attribute:: needs_input
``False`` if the :meth:`.decompress` method can provide more
decompressed data before requiring new uncompressed input.
.. versionadded:: 3.5
.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None) .. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None)
......
...@@ -135,6 +135,97 @@ class CompressorDecompressorTestCase(unittest.TestCase): ...@@ -135,6 +135,97 @@ class CompressorDecompressorTestCase(unittest.TestCase):
self.assertTrue(lzd.eof) self.assertTrue(lzd.eof)
self.assertEqual(lzd.unused_data, b"") self.assertEqual(lzd.unused_data, b"")
def test_decompressor_chunks_maxsize(self):
lzd = LZMADecompressor()
max_length = 100
out = []
# Feed first half the input
len_ = len(COMPRESSED_XZ) // 2
out.append(lzd.decompress(COMPRESSED_XZ[:len_],
max_length=max_length))
self.assertFalse(lzd.needs_input)
self.assertEqual(len(out[-1]), max_length)
# Retrieve more data without providing more input
out.append(lzd.decompress(b'', max_length=max_length))
self.assertFalse(lzd.needs_input)
self.assertEqual(len(out[-1]), max_length)
# Retrieve more data while providing more input
out.append(lzd.decompress(COMPRESSED_XZ[len_:],
max_length=max_length))
self.assertLessEqual(len(out[-1]), max_length)
# Retrieve remaining uncompressed data
while not lzd.eof:
out.append(lzd.decompress(b'', max_length=max_length))
self.assertLessEqual(len(out[-1]), max_length)
out = b"".join(out)
self.assertEqual(out, INPUT)
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
self.assertEqual(lzd.unused_data, b"")
def test_decompressor_inputbuf_1(self):
# Test reusing input buffer after moving existing
# contents to beginning
lzd = LZMADecompressor()
out = []
# Create input buffer and fill it
self.assertEqual(lzd.decompress(COMPRESSED_XZ[:100],
max_length=0), b'')
# Retrieve some results, freeing capacity at beginning
# of input buffer
out.append(lzd.decompress(b'', 2))
# Add more data that fits into input buffer after
# moving existing data to beginning
out.append(lzd.decompress(COMPRESSED_XZ[100:105], 15))
# Decompress rest of data
out.append(lzd.decompress(COMPRESSED_XZ[105:]))
self.assertEqual(b''.join(out), INPUT)
def test_decompressor_inputbuf_2(self):
# Test reusing input buffer by appending data at the
# end right away
lzd = LZMADecompressor()
out = []
# Create input buffer and empty it
self.assertEqual(lzd.decompress(COMPRESSED_XZ[:200],
max_length=0), b'')
out.append(lzd.decompress(b''))
# Fill buffer with new data
out.append(lzd.decompress(COMPRESSED_XZ[200:280], 2))
# Append some more data, not enough to require resize
out.append(lzd.decompress(COMPRESSED_XZ[280:300], 2))
# Decompress rest of data
out.append(lzd.decompress(COMPRESSED_XZ[300:]))
self.assertEqual(b''.join(out), INPUT)
def test_decompressor_inputbuf_3(self):
# Test reusing input buffer after extending it
lzd = LZMADecompressor()
out = []
# Create almost full input buffer
out.append(lzd.decompress(COMPRESSED_XZ[:200], 5))
# Add even more data to it, requiring resize
out.append(lzd.decompress(COMPRESSED_XZ[200:300], 5))
# Decompress rest of data
out.append(lzd.decompress(COMPRESSED_XZ[300:]))
self.assertEqual(b''.join(out), INPUT)
def test_decompressor_unused_data(self): def test_decompressor_unused_data(self):
lzd = LZMADecompressor() lzd = LZMADecompressor()
extra = b"fooblibar" extra = b"fooblibar"
......
...@@ -203,6 +203,9 @@ Core and Builtins ...@@ -203,6 +203,9 @@ Core and Builtins
Library Library
------- -------
- Issue #15955: Add an option to limit output size when decompressing LZMA
data. Patch by Nikolaus Rath and Martin Panter.
- Issue #23250: In the http.cookies module, capitalize "HttpOnly" and "Secure" - Issue #23250: In the http.cookies module, capitalize "HttpOnly" and "Secure"
as they are written in the standard. as they are written in the standard.
......
This diff is collapsed.
...@@ -62,34 +62,43 @@ _lzma_LZMACompressor_flush(Compressor *self, PyObject *Py_UNUSED(ignored)) ...@@ -62,34 +62,43 @@ _lzma_LZMACompressor_flush(Compressor *self, PyObject *Py_UNUSED(ignored))
} }
PyDoc_STRVAR(_lzma_LZMADecompressor_decompress__doc__, PyDoc_STRVAR(_lzma_LZMADecompressor_decompress__doc__,
"decompress($self, data, /)\n" "decompress($self, /, data, max_length=-1)\n"
"--\n" "--\n"
"\n" "\n"
"Provide data to the decompressor object.\n" "Decompresses *data*, returning uncompressed data as bytes.\n"
"\n" "\n"
"Returns a chunk of decompressed data if possible, or b\'\' otherwise.\n" "If *max_length* is nonnegative, returns at most *max_length* bytes of\n"
"decompressed data. If this limit is reached and further output can be\n"
"produced, *self.needs_input* will be set to ``False``. In this case, the next\n"
"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n"
"\n" "\n"
"Attempting to decompress data after the end of stream is reached\n" "If all of the input data was decompressed and returned (either because this\n"
"raises an EOFError. Any data found after the end of the stream\n" "was less than *max_length* bytes, or because *max_length* was negative),\n"
"is ignored and saved in the unused_data attribute."); "*self.needs_input* will be set to True.\n"
"\n"
"Attempting to decompress data after the end of stream is reached raises an\n"
"EOFError. Any data found after the end of the stream is ignored and saved in\n"
"the unused_data attribute.");
#define _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF \ #define _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF \
{"decompress", (PyCFunction)_lzma_LZMADecompressor_decompress, METH_VARARGS, _lzma_LZMADecompressor_decompress__doc__}, {"decompress", (PyCFunction)_lzma_LZMADecompressor_decompress, METH_VARARGS|METH_KEYWORDS, _lzma_LZMADecompressor_decompress__doc__},
static PyObject * static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data); _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data, Py_ssize_t max_length);
static PyObject * static PyObject *
_lzma_LZMADecompressor_decompress(Decompressor *self, PyObject *args) _lzma_LZMADecompressor_decompress(Decompressor *self, PyObject *args, PyObject *kwargs)
{ {
PyObject *return_value = NULL; PyObject *return_value = NULL;
static char *_keywords[] = {"data", "max_length", NULL};
Py_buffer data = {NULL, NULL}; Py_buffer data = {NULL, NULL};
Py_ssize_t max_length = -1;
if (!PyArg_ParseTuple(args, if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"y*:decompress", "y*|n:decompress", _keywords,
&data)) &data, &max_length))
goto exit; goto exit;
return_value = _lzma_LZMADecompressor_decompress_impl(self, &data); return_value = _lzma_LZMADecompressor_decompress_impl(self, &data, max_length);
exit: exit:
/* Cleanup for data */ /* Cleanup for data */
...@@ -242,4 +251,4 @@ exit: ...@@ -242,4 +251,4 @@ exit:
return return_value; return return_value;
} }
/*[clinic end generated code: output=808fec8216ac712b input=a9049054013a1b77]*/ /*[clinic end generated code: output=d17fac38b09626d8 input=a9049054013a1b77]*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment