Kaydet (Commit) f7351b40 authored tarafından Victor Stinner's avatar Victor Stinner

Merged revisions 80031 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r80031 | victor.stinner | 2010-04-13 13:07:24 +0200 (mar., 13 avril 2010) | 4 lines

  Issue #8383: pickle and pickletools use surrogatepass error handler when
  encoding unicode as utf8 to support lone surrogates and stay compatible with
  Python 2.x and 3.0
........
üst 1bc6f6ea
...@@ -499,7 +499,7 @@ class _Pickler: ...@@ -499,7 +499,7 @@ class _Pickler:
def save_str(self, obj, pack=struct.pack): def save_str(self, obj, pack=struct.pack):
if self.bin: if self.bin:
encoded = obj.encode('utf-8') encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded) n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded) self.write(BINUNICODE + pack("<i", n) + encoded)
else: else:
...@@ -966,7 +966,7 @@ class _Unpickler: ...@@ -966,7 +966,7 @@ class _Unpickler:
def load_binunicode(self): def load_binunicode(self):
len = mloads(b'i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(str(self.read(len), 'utf-8')) self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self): def load_short_binstring(self):
......
...@@ -469,7 +469,7 @@ def read_unicodestring4(f): ...@@ -469,7 +469,7 @@ def read_unicodestring4(f):
raise ValueError("unicodestring4 byte count < 0: %d" % n) raise ValueError("unicodestring4 byte count < 0: %d" % n)
data = f.read(n) data = f.read(n)
if len(data) == n: if len(data) == n:
return str(data, 'utf-8') return str(data, 'utf-8', 'surrogatepass')
raise ValueError("expected %d bytes in a unicodestring4, but only %d " raise ValueError("expected %d bytes in a unicodestring4, but only %d "
"remain" % (n, len(data))) "remain" % (n, len(data)))
......
...@@ -515,7 +515,9 @@ class AbstractPickleTests(unittest.TestCase): ...@@ -515,7 +515,9 @@ class AbstractPickleTests(unittest.TestCase):
def test_unicode(self): def test_unicode(self):
endcases = ['', '<\\u>', '<\\\u1234>', '<\n>', endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
'<\\>', '<\\\U00012345>'] '<\\>', '<\\\U00012345>',
# surrogates
'<\udc80>']
for proto in protocols: for proto in protocols:
for u in endcases: for u in endcases:
p = self.dumps(u, proto) p = self.dumps(u, proto)
......
...@@ -28,6 +28,10 @@ Core and Builtins ...@@ -28,6 +28,10 @@ Core and Builtins
Library Library
------- -------
- Issue #8383: pickle and pickletools use surrogatepass error handler when
encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
- Issue #8179: Fix macpath.realpath() on a non-existing path. - Issue #8179: Fix macpath.realpath() on a non-existing path.
- Issue #8139: ossaudiodev didn't initialize its types properly, therefore - Issue #8139: ossaudiodev didn't initialize its types properly, therefore
......
...@@ -1227,7 +1227,9 @@ save_unicode(PicklerObject *self, PyObject *obj) ...@@ -1227,7 +1227,9 @@ save_unicode(PicklerObject *self, PyObject *obj)
if (self->bin) { if (self->bin) {
char pdata[5]; char pdata[5];
encoded = PyUnicode_AsUTF8String(obj); encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
PyUnicode_GET_SIZE(obj),
"surrogatepass");
if (encoded == NULL) if (encoded == NULL)
goto error; goto error;
...@@ -3352,7 +3354,7 @@ load_binunicode(UnpicklerObject *self) ...@@ -3352,7 +3354,7 @@ load_binunicode(UnpicklerObject *self)
if (unpickler_read(self, &s, size) < 0) if (unpickler_read(self, &s, size) < 0)
return -1; return -1;
str = PyUnicode_DecodeUTF8(s, size, NULL); str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
if (str == NULL) if (str == NULL)
return -1; return -1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment