Kaydet (Commit) ac22f6aa authored tarafından Christopher Thorne's avatar Christopher Thorne Kaydeden (comit) Miss Islington (bot)

bpo-33578: Add getstate/setstate for CJK codec (GH-6984)



This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell.

The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long.


https://bugs.python.org/issue33578
üst 4b5e62db
...@@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase): ...@@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase):
finally: finally:
StatefulIncrementalDecoder.codecEnabled = 0 StatefulIncrementalDecoder.codecEnabled = 0
def test_multibyte_seek_and_tell(self):
f = self.open(support.TESTFN, "w", encoding="euc_jp")
f.write("AB\n\u3046\u3048\n")
f.close()
f = self.open(support.TESTFN, "r", encoding="euc_jp")
self.assertEqual(f.readline(), "AB\n")
p0 = f.tell()
self.assertEqual(f.readline(), "\u3046\u3048\n")
p1 = f.tell()
f.seek(p0)
self.assertEqual(f.readline(), "\u3046\u3048\n")
self.assertEqual(f.tell(), p1)
f.close()
def test_seek_with_encoder_state(self):
f = self.open(support.TESTFN, "w", encoding="euc_jis_2004")
f.write("\u00e6\u0300")
p0 = f.tell()
f.write("\u00e6")
f.seek(p0)
f.write("\u0300")
f.close()
f = self.open(support.TESTFN, "r", encoding="euc_jis_2004")
self.assertEqual(f.readline(), "\u00e6\u0300\u0300")
f.close()
def test_encoded_writes(self): def test_encoded_writes(self):
data = "1234567890" data = "1234567890"
tests = ("utf-16", tests = ("utf-16",
......
...@@ -117,6 +117,88 @@ class Test_IncrementalEncoder(unittest.TestCase): ...@@ -117,6 +117,88 @@ class Test_IncrementalEncoder(unittest.TestCase):
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123') self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
self.assertEqual(encoder.encode('', True), b'\xa9\xdc') self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
def test_state_methods_with_buffer_state(self):
# euc_jis_2004 stores state as a buffer of pending bytes
encoder = codecs.getincrementalencoder('euc_jis_2004')()
initial_state = encoder.getstate()
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
encoder.setstate(initial_state)
self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
self.assertEqual(encoder.encode('\u00e6'), b'')
partial_state = encoder.getstate()
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
encoder.setstate(partial_state)
self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
def test_state_methods_with_non_buffer_state(self):
# iso2022_jp stores state without using a buffer
encoder = codecs.getincrementalencoder('iso2022_jp')()
self.assertEqual(encoder.encode('z'), b'z')
en_state = encoder.getstate()
self.assertEqual(encoder.encode('\u3042'), b'\x1b\x24\x42\x24\x22')
jp_state = encoder.getstate()
self.assertEqual(encoder.encode('z'), b'\x1b\x28\x42z')
encoder.setstate(jp_state)
self.assertEqual(encoder.encode('\u3042'), b'\x24\x22')
encoder.setstate(en_state)
self.assertEqual(encoder.encode('z'), b'z')
def test_getstate_returns_expected_value(self):
# Note: getstate is implemented such that these state values
# are expected to be the same across all builds of Python,
# regardless of x32/64 bit, endianness and compiler.
# euc_jis_2004 stores state as a buffer of pending bytes
buffer_state_encoder = codecs.getincrementalencoder('euc_jis_2004')()
self.assertEqual(buffer_state_encoder.getstate(), 0)
buffer_state_encoder.encode('\u00e6')
self.assertEqual(buffer_state_encoder.getstate(),
int.from_bytes(
b"\x02"
b"\xc3\xa6"
b"\x00\x00\x00\x00\x00\x00\x00\x00",
'little'))
buffer_state_encoder.encode('\u0300')
self.assertEqual(buffer_state_encoder.getstate(), 0)
# iso2022_jp stores state without using a buffer
non_buffer_state_encoder = codecs.getincrementalencoder('iso2022_jp')()
self.assertEqual(non_buffer_state_encoder.getstate(),
int.from_bytes(
b"\x00"
b"\x42\x42\x00\x00\x00\x00\x00\x00",
'little'))
non_buffer_state_encoder.encode('\u3042')
self.assertEqual(non_buffer_state_encoder.getstate(),
int.from_bytes(
b"\x00"
b"\xc2\x42\x00\x00\x00\x00\x00\x00",
'little'))
def test_setstate_validates_input_size(self):
encoder = codecs.getincrementalencoder('euc_jp')()
pending_size_nine = int.from_bytes(
b"\x09"
b"\x00\x00\x00\x00\x00\x00\x00\x00"
b"\x00\x00\x00\x00\x00\x00\x00\x00",
'little')
self.assertRaises(UnicodeError, encoder.setstate, pending_size_nine)
def test_setstate_validates_input_bytes(self):
encoder = codecs.getincrementalencoder('euc_jp')()
invalid_utf8 = int.from_bytes(
b"\x01"
b"\xff"
b"\x00\x00\x00\x00\x00\x00\x00\x00",
'little')
self.assertRaises(UnicodeDecodeError, encoder.setstate, invalid_utf8)
def test_issue5640(self): def test_issue5640(self):
encoder = codecs.getincrementalencoder('shift-jis')('backslashreplace') encoder = codecs.getincrementalencoder('shift-jis')('backslashreplace')
self.assertEqual(encoder.encode('\xff'), b'\\xff') self.assertEqual(encoder.encode('\xff'), b'\\xff')
...@@ -165,6 +247,37 @@ class Test_IncrementalDecoder(unittest.TestCase): ...@@ -165,6 +247,37 @@ class Test_IncrementalDecoder(unittest.TestCase):
decoder = codecs.getincrementaldecoder(enc)() decoder = codecs.getincrementaldecoder(enc)()
self.assertRaises(TypeError, decoder.decode, "") self.assertRaises(TypeError, decoder.decode, "")
def test_state_methods(self):
decoder = codecs.getincrementaldecoder('euc_jp')()
# Decode a complete input sequence
self.assertEqual(decoder.decode(b'\xa4\xa6'), '\u3046')
pending1, _ = decoder.getstate()
self.assertEqual(pending1, b'')
# Decode first half of a partial input sequence
self.assertEqual(decoder.decode(b'\xa4'), '')
pending2, flags2 = decoder.getstate()
self.assertEqual(pending2, b'\xa4')
# Decode second half of a partial input sequence
self.assertEqual(decoder.decode(b'\xa6'), '\u3046')
pending3, _ = decoder.getstate()
self.assertEqual(pending3, b'')
# Jump back and decode second half of partial input sequence again
decoder.setstate((pending2, flags2))
self.assertEqual(decoder.decode(b'\xa6'), '\u3046')
pending4, _ = decoder.getstate()
self.assertEqual(pending4, b'')
def test_setstate_validates_input(self):
decoder = codecs.getincrementaldecoder('euc_jp')()
self.assertRaises(TypeError, decoder.setstate, 123)
self.assertRaises(TypeError, decoder.setstate, ("invalid", 0))
self.assertRaises(TypeError, decoder.setstate, (b"1234", "invalid"))
self.assertRaises(UnicodeError, decoder.setstate, (b"123456789", 0))
class Test_StreamReader(unittest.TestCase): class Test_StreamReader(unittest.TestCase):
def test_bug1728403(self): def test_bug1728403(self):
try: try:
......
...@@ -1626,6 +1626,7 @@ Nicolas M. Thiéry ...@@ -1626,6 +1626,7 @@ Nicolas M. Thiéry
James Thomas James Thomas
Robin Thomas Robin Thomas
Brian Thorne Brian Thorne
Christopher Thorne
Stephen Thorne Stephen Thorne
Jeremy Thurgood Jeremy Thurgood
Eric Tiedemann Eric Tiedemann
......
...@@ -51,6 +51,12 @@ ...@@ -51,6 +51,12 @@
; \ ; \
} }
/*
* codecs in this file use the first byte of MultibyteCodec_State.c[8]
* to store a 0 or 1 state value
*/
#define CN_STATE_OFFSET 0
/* /*
* GB2312 codec * GB2312 codec
*/ */
...@@ -329,15 +335,15 @@ DECODER(gb18030) ...@@ -329,15 +335,15 @@ DECODER(gb18030)
ENCODER_INIT(hz) ENCODER_INIT(hz)
{ {
state->i = 0; state->c[CN_STATE_OFFSET] = 0;
return 0; return 0;
} }
ENCODER_RESET(hz) ENCODER_RESET(hz)
{ {
if (state->i != 0) { if (state->c[CN_STATE_OFFSET] != 0) {
WRITEBYTE2('~', '}'); WRITEBYTE2('~', '}');
state->i = 0; state->c[CN_STATE_OFFSET] = 0;
NEXT_OUT(2); NEXT_OUT(2);
} }
return 0; return 0;
...@@ -350,10 +356,10 @@ ENCODER(hz) ...@@ -350,10 +356,10 @@ ENCODER(hz)
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
if (state->i) { if (state->c[CN_STATE_OFFSET]) {
WRITEBYTE2('~', '}'); WRITEBYTE2('~', '}');
NEXT_OUT(2); NEXT_OUT(2);
state->i = 0; state->c[CN_STATE_OFFSET] = 0;
} }
WRITEBYTE1((unsigned char)c); WRITEBYTE1((unsigned char)c);
NEXT(1, 1); NEXT(1, 1);
...@@ -375,10 +381,10 @@ ENCODER(hz) ...@@ -375,10 +381,10 @@ ENCODER(hz)
if (code & 0x8000) /* MSB set: GBK */ if (code & 0x8000) /* MSB set: GBK */
return 1; return 1;
if (state->i == 0) { if (state->c[CN_STATE_OFFSET] == 0) {
WRITEBYTE4('~', '{', code >> 8, code & 0xff); WRITEBYTE4('~', '{', code >> 8, code & 0xff);
NEXT(1, 4); NEXT(1, 4);
state->i = 1; state->c[CN_STATE_OFFSET] = 1;
} }
else { else {
WRITEBYTE2(code >> 8, code & 0xff); WRITEBYTE2(code >> 8, code & 0xff);
...@@ -391,13 +397,13 @@ ENCODER(hz) ...@@ -391,13 +397,13 @@ ENCODER(hz)
DECODER_INIT(hz) DECODER_INIT(hz)
{ {
state->i = 0; state->c[CN_STATE_OFFSET] = 0;
return 0; return 0;
} }
DECODER_RESET(hz) DECODER_RESET(hz)
{ {
state->i = 0; state->c[CN_STATE_OFFSET] = 0;
return 0; return 0;
} }
...@@ -411,14 +417,14 @@ DECODER(hz) ...@@ -411,14 +417,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2; unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2); REQUIRE_INBUF(2);
if (c2 == '~' && state->i == 0) if (c2 == '~' && state->c[CN_STATE_OFFSET] == 0)
OUTCHAR('~'); OUTCHAR('~');
else if (c2 == '{' && state->i == 0) else if (c2 == '{' && state->c[CN_STATE_OFFSET] == 0)
state->i = 1; /* set GB */ state->c[CN_STATE_OFFSET] = 1; /* set GB */
else if (c2 == '\n' && state->i == 0) else if (c2 == '\n' && state->c[CN_STATE_OFFSET] == 0)
; /* line-continuation */ ; /* line-continuation */
else if (c2 == '}' && state->i == 1) else if (c2 == '}' && state->c[CN_STATE_OFFSET] == 1)
state->i = 0; /* set ASCII */ state->c[CN_STATE_OFFSET] = 0; /* set ASCII */
else else
return 1; return 1;
NEXT_IN(2); NEXT_IN(2);
...@@ -428,7 +434,7 @@ DECODER(hz) ...@@ -428,7 +434,7 @@ DECODER(hz)
if (c & 0x80) if (c & 0x80)
return 1; return 1;
if (state->i == 0) { /* ASCII mode */ if (state->c[CN_STATE_OFFSET] == 0) { /* ASCII mode */
OUTCHAR(c); OUTCHAR(c);
NEXT_IN(1); NEXT_IN(1);
} }
......
...@@ -115,6 +115,50 @@ exit: ...@@ -115,6 +115,50 @@ exit:
return return_value; return return_value;
} }
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_getstate__doc__,
"getstate($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF \
{"getstate", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_getstate, METH_NOARGS, _multibytecodec_MultibyteIncrementalEncoder_getstate__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self);
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate(MultibyteIncrementalEncoderObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(self);
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_setstate__doc__,
"setstate($self, state, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF \
{"setstate", (PyCFunction)_multibytecodec_MultibyteIncrementalEncoder_setstate, METH_O, _multibytecodec_MultibyteIncrementalEncoder_setstate__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
PyLongObject *statelong);
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate(MultibyteIncrementalEncoderObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
PyLongObject *statelong;
if (!PyArg_Parse(arg, "O!:setstate", &PyLong_Type, &statelong)) {
goto exit;
}
return_value = _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(self, statelong);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_reset__doc__, PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalEncoder_reset__doc__,
"reset($self, /)\n" "reset($self, /)\n"
"--\n" "--\n"
...@@ -169,6 +213,50 @@ exit: ...@@ -169,6 +213,50 @@ exit:
return return_value; return return_value;
} }
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_getstate__doc__,
"getstate($self, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF \
{"getstate", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_getstate, METH_NOARGS, _multibytecodec_MultibyteIncrementalDecoder_getstate__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self);
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate(MultibyteIncrementalDecoderObject *self, PyObject *Py_UNUSED(ignored))
{
return _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(self);
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_setstate__doc__,
"setstate($self, state, /)\n"
"--\n"
"\n");
#define _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF \
{"setstate", (PyCFunction)_multibytecodec_MultibyteIncrementalDecoder_setstate, METH_O, _multibytecodec_MultibyteIncrementalDecoder_setstate__doc__},
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
PyObject *state);
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate(MultibyteIncrementalDecoderObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
PyObject *state;
if (!PyArg_Parse(arg, "O!:setstate", &PyTuple_Type, &state)) {
goto exit;
}
return_value = _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(self, state);
exit:
return return_value;
}
PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_reset__doc__, PyDoc_STRVAR(_multibytecodec_MultibyteIncrementalDecoder_reset__doc__,
"reset($self, /)\n" "reset($self, /)\n"
"--\n" "--\n"
...@@ -330,4 +418,4 @@ PyDoc_STRVAR(_multibytecodec___create_codec__doc__, ...@@ -330,4 +418,4 @@ PyDoc_STRVAR(_multibytecodec___create_codec__doc__,
#define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \ #define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \
{"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__}, {"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__},
/*[clinic end generated code: output=680f59f4cfe63c25 input=a9049054013a1b77]*/ /*[clinic end generated code: output=2fa0a38494716b97 input=a9049054013a1b77]*/
...@@ -895,6 +895,93 @@ _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEnco ...@@ -895,6 +895,93 @@ _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEnco
return encoder_encode_stateful(STATEFUL_ECTX(self), input, final); return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
} }
/*[clinic input]
_multibytecodec.MultibyteIncrementalEncoder.getstate
[clinic start generated code]*/
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
{
/* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
for UTF-8 encoded buffer (each character can use up to 4
bytes), and required bytes for MultibyteCodec_State.c. A byte
array is used to avoid different compilers generating different
values for the same state, e.g. as a result of struct padding.
*/
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
Py_ssize_t statesize;
const char *pendingbuffer = NULL;
Py_ssize_t pendingsize;
if (self->pending != NULL) {
pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
if (pendingbuffer == NULL) {
return NULL;
}
if (pendingsize > MAXENCPENDING*4) {
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
return NULL;
}
statebytes[0] = pendingsize;
memcpy(statebytes+1, pendingbuffer, pendingsize);
statesize = 1 + pendingsize;
} else {
statebytes[0] = 0;
statesize = 1;
}
memcpy(statebytes+statesize, self->state.c,
sizeof(self->state.c));
statesize += sizeof(self->state.c);
return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
1 /* little-endian */ ,
0 /* unsigned */ );
}
/*[clinic input]
_multibytecodec.MultibyteIncrementalEncoder.setstate
state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
/
[clinic start generated code]*/
static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
PyLongObject *statelong)
/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
{
PyObject *pending = NULL;
unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1 /* little-endian */ ,
0 /* unsigned */ ) < 0) {
goto errorexit;
}
if (statebytes[0] > MAXENCPENDING*4) {
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
return NULL;
}
pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
statebytes[0], "strict");
if (pending == NULL) {
goto errorexit;
}
Py_CLEAR(self->pending);
self->pending = pending;
memcpy(self->state.c, statebytes+1+statebytes[0],
sizeof(self->state.c));
Py_RETURN_NONE;
errorexit:
Py_XDECREF(pending);
return NULL;
}
/*[clinic input] /*[clinic input]
_multibytecodec.MultibyteIncrementalEncoder.reset _multibytecodec.MultibyteIncrementalEncoder.reset
[clinic start generated code]*/ [clinic start generated code]*/
...@@ -919,6 +1006,8 @@ _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncod ...@@ -919,6 +1006,8 @@ _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncod
static struct PyMethodDef mbiencoder_methods[] = { static struct PyMethodDef mbiencoder_methods[] = {
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
{NULL, NULL}, {NULL, NULL},
}; };
...@@ -984,6 +1073,7 @@ mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) ...@@ -984,6 +1073,7 @@ mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
{ {
PyObject_GC_UnTrack(self); PyObject_GC_UnTrack(self);
ERROR_DECREF(self->errors); ERROR_DECREF(self->errors);
Py_CLEAR(self->pending);
Py_TYPE(self)->tp_free(self); Py_TYPE(self)->tp_free(self);
} }
...@@ -1119,6 +1209,68 @@ errorexit: ...@@ -1119,6 +1209,68 @@ errorexit:
return NULL; return NULL;
} }
/*[clinic input]
_multibytecodec.MultibyteIncrementalDecoder.getstate
[clinic start generated code]*/
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
{
PyObject *buffer;
buffer = PyBytes_FromStringAndSize((const char *)self->pending,
self->pendingsize);
if (buffer == NULL) {
return NULL;
}
return make_tuple(buffer, (Py_ssize_t)*self->state.c);
}
/*[clinic input]
_multibytecodec.MultibyteIncrementalDecoder.setstate
state: object(subclass_of='&PyTuple_Type')
/
[clinic start generated code]*/
static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
PyObject *state)
/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
{
PyObject *buffer;
Py_ssize_t buffersize;
char *bufferstr;
unsigned long long flag;
if (!PyArg_ParseTuple(state, "SK;setstate(): illegal state argument",
&buffer, &flag))
{
return NULL;
}
buffersize = PyBytes_Size(buffer);
if (buffersize == -1) {
return NULL;
}
if (buffersize > MAXDECPENDING) {
PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
return NULL;
}
bufferstr = PyBytes_AsString(buffer);
if (bufferstr == NULL) {
return NULL;
}
self->pendingsize = buffersize;
memcpy(self->pending, bufferstr, self->pendingsize);
memcpy(self->state.c, (unsigned char *)&flag, sizeof(flag));
Py_RETURN_NONE;
}
/*[clinic input] /*[clinic input]
_multibytecodec.MultibyteIncrementalDecoder.reset _multibytecodec.MultibyteIncrementalDecoder.reset
[clinic start generated code]*/ [clinic start generated code]*/
...@@ -1137,6 +1289,8 @@ _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecod ...@@ -1137,6 +1289,8 @@ _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecod
static struct PyMethodDef mbidecoder_methods[] = { static struct PyMethodDef mbidecoder_methods[] = {
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
_MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
{NULL, NULL}, {NULL, NULL},
}; };
......
...@@ -16,12 +16,15 @@ typedef uint16_t ucs2_t, DBCHAR; ...@@ -16,12 +16,15 @@ typedef uint16_t ucs2_t, DBCHAR;
typedef unsigned short ucs2_t, DBCHAR; typedef unsigned short ucs2_t, DBCHAR;
#endif #endif
typedef union { /*
void *p; * A struct that provides 8 bytes of state for multibyte
int i; * codecs. Codecs are free to use this how they want. Note: if you
* need to add a new field to this struct, ensure that its byte order
* is independent of CPU endianness so that the return value of
* getstate doesn't differ between little and big endian CPUs.
*/
typedef struct {
unsigned char c[8]; unsigned char c[8];
ucs2_t u2[4];
Py_UCS4 u4[2];
} MultibyteCodec_State; } MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config); typedef int (*mbcodec_init)(const void *config);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment