Kaydet (Commit) 5087980c authored tarafından Amaury Forgeot d'Arc's avatar Amaury Forgeot d'Arc

The incremental decoder for utf-7 must preserve its state between calls.

Solves issue1460.

Might not be a backport candidate: a new API function was added,
and some code may rely on details in utf-7.py.
üst 8c4592a7
......@@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
const char *errors /* error handling */
);
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
const char *string, /* UTF-7 encoded string */
Py_ssize_t length, /* size of string */
const char *errors, /* error handling */
Py_ssize_t *consumed /* bytes consumed */
);
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
......
......@@ -6,34 +6,31 @@ import codecs
### Codec APIs
class Codec(codecs.Codec):
encode = codecs.utf_7_encode
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.utf_7_encode
decode = codecs.utf_7_decode
def decode(input, errors='strict'):
return codecs.utf_7_decode(input, errors, True)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.utf_7_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, input, errors, final):
return codecs.utf_7_decode(input, self.errors)
_buffer_decode = codecs.utf_7_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamWriter(codecs.StreamWriter):
encode = codecs.utf_7_encode
class StreamReader(Codec,codecs.StreamReader):
pass
class StreamReader(codecs.StreamReader):
decode = codecs.utf_7_decode
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='utf-7',
encode=Codec.encode,
decode=Codec.decode,
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
......
......@@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase):
self.assertEqual(d.decode("", True), u"")
self.assertEqual(d.buffer, "")
# Check whether the rest method works properly
# Check whether the reset method works properly
d.reset()
result = u""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
......@@ -491,7 +491,17 @@ class UTF8Test(ReadTest):
class UTF7Test(ReadTest):
encoding = "utf-7"
# No test_partial() yet, because UTF-7 doesn't support it.
def test_partial(self):
self.check_partial(
u"a+-b",
[
u"a",
u"a",
u"a+",
u"a+-",
u"a+-b",
]
)
class UTF16ExTest(unittest.TestCase):
......
......@@ -230,18 +230,25 @@ unicode_internal_decode(PyObject *self,
static PyObject *
utf_7_decode(PyObject *self,
PyObject *args)
PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
int final = 0;
Py_ssize_t consumed;
PyObject *decoded = NULL;
if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
&data, &size, &errors))
return NULL;
if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
&data, &size, &errors, &final))
return NULL;
consumed = size;
return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
size);
decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
final ? NULL : &consumed);
if (decoded == NULL)
return NULL;
return codec_tuple(decoded, consumed);
}
static PyObject *
......
......@@ -943,6 +943,14 @@ char utf7_special[128] = {
PyObject *PyUnicode_DecodeUTF7(const char *s,
Py_ssize_t size,
const char *errors)
{
return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
}
PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
const char *starts = s;
Py_ssize_t startinpos;
......@@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
unicode = _PyUnicode_New(size);
if (!unicode)
return NULL;
if (size == 0)
if (size == 0) {
if (consumed)
*consumed = 0;
return (PyObject *)unicode;
}
p = unicode->str;
e = s + size;
......@@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
goto onError;
}
if (inShift) {
if (inShift && !consumed) {
outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = size;
if (unicode_decode_call_errorhandler(
......@@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
if (s < e)
goto restart;
}
if (consumed) {
if(inShift)
*consumed = startinpos;
else
*consumed = s-starts;
}
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
goto onError;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment