Kaydet (Commit) 53a9dd77 authored tarafından Victor Stinner's avatar Victor Stinner

Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters

Fix the doc and add tests.
üst 84cc0628
...@@ -1114,9 +1114,9 @@ particular, the following variants typically exist: ...@@ -1114,9 +1114,9 @@ particular, the following variants typically exist:
+-----------------+--------------------------------+--------------------------------+ +-----------------+--------------------------------+--------------------------------+
| utf_16 | U16, utf16 | all languages | | utf_16 | U16, utf16 | all languages |
+-----------------+--------------------------------+--------------------------------+ +-----------------+--------------------------------+--------------------------------+
| utf_16_be | UTF-16BE | all languages (BMP only) | | utf_16_be | UTF-16BE | all languages |
+-----------------+--------------------------------+--------------------------------+ +-----------------+--------------------------------+--------------------------------+
| utf_16_le | UTF-16LE | all languages (BMP only) | | utf_16_le | UTF-16LE | all languages |
+-----------------+--------------------------------+--------------------------------+ +-----------------+--------------------------------+--------------------------------+
| utf_7 | U7, unicode-1-1-utf-7 | all languages | | utf_7 | U7, unicode-1-1-utf-7 | all languages |
+-----------------+--------------------------------+--------------------------------+ +-----------------+--------------------------------+--------------------------------+
......
...@@ -544,6 +544,12 @@ class UTF16LETest(ReadTest): ...@@ -544,6 +544,12 @@ class UTF16LETest(ReadTest):
self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
b"\xff", "strict", True) b"\xff", "strict", True)
def test_nonbmp(self):
self.assertEqual("\U00010203".encode(self.encoding),
b'\x00\xd8\x03\xde')
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
"\U00010203")
class UTF16BETest(ReadTest): class UTF16BETest(ReadTest):
encoding = "utf-16-be" encoding = "utf-16-be"
...@@ -566,6 +572,12 @@ class UTF16BETest(ReadTest): ...@@ -566,6 +572,12 @@ class UTF16BETest(ReadTest):
self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
b"\xff", "strict", True) b"\xff", "strict", True)
def test_nonbmp(self):
self.assertEqual("\U00010203".encode(self.encoding),
b'\xd8\x00\xde\x03')
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
"\U00010203")
class UTF8Test(ReadTest): class UTF8Test(ReadTest):
encoding = "utf-8" encoding = "utf-8"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment