Kaydet (Commit) 024da5c2 authored tarafından Guido van Rossum's avatar Guido van Rossum

Make all the multibyte codec tests pass.

Changes to io.py, necessary to make this work:
- Redid io.StringIO as a TextIOWrapper on top of a BytesIO instance.
- Got rid of _MemoryIOMixin, folding it into BytesIO instead.
- The read() functions that take -1 to mean "eveything" now also take None.
- Added readline() support to BufferedIOBase. :-(
üst f4cfc8f6
...@@ -415,8 +415,8 @@ class BufferedIOBase(IOBase): ...@@ -415,8 +415,8 @@ class BufferedIOBase(IOBase):
def read(self, n: int = -1) -> bytes: def read(self, n: int = -1) -> bytes:
"""read(n: int = -1) -> bytes. Read and return up to n bytes. """read(n: int = -1) -> bytes. Read and return up to n bytes.
If the argument is omitted, or negative, reads and returns all If the argument is omitted, None, or negative, reads and
data until EOF. returns all data until EOF.
If the argument is positive, and the underlying raw stream is If the argument is positive, and the underlying raw stream is
not 'interactive', multiple raw reads may be issued to satisfy not 'interactive', multiple raw reads may be issued to satisfy
...@@ -450,6 +450,20 @@ class BufferedIOBase(IOBase): ...@@ -450,6 +450,20 @@ class BufferedIOBase(IOBase):
b[:n] = data b[:n] = data
return n return n
def readline(self, sizehint: int = -1) -> bytes:
"""For backwards compatibility, a (slow) readline()."""
if sizehint is None:
sizehint = -1
res = b""
while sizehint < 0 or len(res) < sizehint:
b = self.read(1)
if not b:
break
res += b
if b == b"\n":
break
return res
def write(self, b: bytes) -> int: def write(self, b: bytes) -> int:
"""write(b: bytes) -> int. Write the given buffer to the IO stream. """write(b: bytes) -> int. Write the given buffer to the IO stream.
...@@ -518,19 +532,25 @@ class _BufferedIOMixin(BufferedIOBase): ...@@ -518,19 +532,25 @@ class _BufferedIOMixin(BufferedIOBase):
return self.raw.isatty() return self.raw.isatty()
class _MemoryIOMixin(BufferedIOBase): class BytesIO(BufferedIOBase):
# XXX docstring """Buffered I/O implementation using an in-memory bytes buffer."""
def __init__(self, buffer): # XXX More docs
def __init__(self, initial_bytes=None):
buffer = b""
if initial_bytes is not None:
buffer += initial_bytes
self._buffer = buffer self._buffer = buffer
self._pos = 0 self._pos = 0
def getvalue(self): def getvalue(self):
return self._buffer return self._buffer
def read(self, n=-1): def read(self, n=None):
assert n is not None if n is None:
n = -1
if n < 0: if n < 0:
n = len(self._buffer) n = len(self._buffer)
newpos = min(len(self._buffer), self._pos + n) newpos = min(len(self._buffer), self._pos + n)
...@@ -538,6 +558,9 @@ class _MemoryIOMixin(BufferedIOBase): ...@@ -538,6 +558,9 @@ class _MemoryIOMixin(BufferedIOBase):
self._pos = newpos self._pos = newpos
return b return b
def read1(self, n):
return self.read(n)
def write(self, b): def write(self, b):
n = len(b) n = len(b)
newpos = self._pos + n newpos = self._pos + n
...@@ -575,65 +598,6 @@ class _MemoryIOMixin(BufferedIOBase): ...@@ -575,65 +598,6 @@ class _MemoryIOMixin(BufferedIOBase):
return True return True
class BytesIO(_MemoryIOMixin):
"""Buffered I/O implementation using a bytes buffer, like StringIO."""
# XXX More docs
def __init__(self, initial_bytes=None):
buffer = b""
if initial_bytes is not None:
buffer += initial_bytes
_MemoryIOMixin.__init__(self, buffer)
# XXX This should inherit from TextIOBase
class StringIO(_MemoryIOMixin):
"""Buffered I/O implementation using a string buffer, like StringIO."""
# XXX More docs
# Reuses the same code as BytesIO, but encode strings on the way in
# and decode them on the way out.
charsize = len("!".encode("unicode-internal"))
def __init__(self, initial_string=None):
if initial_string is not None:
buffer = initial_string.encode("unicode-internal")
else:
buffer = b""
_MemoryIOMixin.__init__(self, buffer)
def getvalue(self):
return self._buffer.encode("unicode-internal")
def read(self, n=-1):
return super(StringIO, self).read(n*self.charsize) \
.decode("unicode-internal")
def write(self, s):
return super(StringIO, self).write(s.encode("unicode-internal")) \
//self.charsize
def seek(self, pos, whence=0):
return super(StringIO, self).seek(self.charsize*pos, whence) \
//self.charsize
def tell(self):
return super(StringIO, self).tell()//self.charsize
def truncate(self, pos=None):
if pos is not None:
pos *= self.charsize
return super(StringIO, self).truncate(pos)//self.charsize
def readinto(self, b: bytes) -> int:
self._unsupported("readinto")
class BufferedReader(_BufferedIOMixin): class BufferedReader(_BufferedIOMixin):
"""Buffer for a readable sequential RawIO object.""" """Buffer for a readable sequential RawIO object."""
...@@ -646,7 +610,7 @@ class BufferedReader(_BufferedIOMixin): ...@@ -646,7 +610,7 @@ class BufferedReader(_BufferedIOMixin):
self._read_buf = b"" self._read_buf = b""
self.buffer_size = buffer_size self.buffer_size = buffer_size
def read(self, n=-1): def read(self, n=None):
"""Read n bytes. """Read n bytes.
Returns exactly n bytes of data unless the underlying raw IO Returns exactly n bytes of data unless the underlying raw IO
...@@ -654,7 +618,8 @@ class BufferedReader(_BufferedIOMixin): ...@@ -654,7 +618,8 @@ class BufferedReader(_BufferedIOMixin):
mode. If n is negative, read until EOF or until read() would mode. If n is negative, read until EOF or until read() would
block. block.
""" """
assert n is not None if n is None:
n = -1
nodata_val = b"" nodata_val = b""
while n < 0 or len(self._read_buf) < n: while n < 0 or len(self._read_buf) < n:
to_read = max(self.buffer_size, to_read = max(self.buffer_size,
...@@ -801,7 +766,9 @@ class BufferedRWPair(BufferedIOBase): ...@@ -801,7 +766,9 @@ class BufferedRWPair(BufferedIOBase):
self.reader = BufferedReader(reader, buffer_size) self.reader = BufferedReader(reader, buffer_size)
self.writer = BufferedWriter(writer, buffer_size, max_buffer_size) self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
def read(self, n=-1): def read(self, n=None):
if n is None:
n = -1
return self.reader.read(n) return self.reader.read(n)
def readinto(self, b): def readinto(self, b):
...@@ -861,7 +828,9 @@ class BufferedRandom(BufferedWriter, BufferedReader): ...@@ -861,7 +828,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
else: else:
return self.raw.tell() - len(self._read_buf) return self.raw.tell() - len(self._read_buf)
def read(self, n=-1): def read(self, n=None):
if n is None:
n = -1
self.flush() self.flush()
return BufferedReader.read(self, n) return BufferedReader.read(self, n)
...@@ -1129,7 +1098,9 @@ class TextIOWrapper(TextIOBase): ...@@ -1129,7 +1098,9 @@ class TextIOWrapper(TextIOBase):
except UnicodeEncodeError: except UnicodeEncodeError:
return u return u
def read(self, n: int = -1): def read(self, n=None):
if n is None:
n = -1
decoder = self._decoder or self._get_decoder() decoder = self._decoder or self._get_decoder()
res = self._pending res = self._pending
if n < 0: if n < 0:
...@@ -1146,7 +1117,7 @@ class TextIOWrapper(TextIOBase): ...@@ -1146,7 +1117,7 @@ class TextIOWrapper(TextIOBase):
self._pending = res[n:] self._pending = res[n:]
return self._simplify(res[:n]) return self._simplify(res[:n])
def __next__(self) -> str: def __next__(self):
self._telling = False self._telling = False
line = self.readline() line = self.readline()
if not line: if not line:
...@@ -1218,3 +1189,17 @@ class TextIOWrapper(TextIOBase): ...@@ -1218,3 +1189,17 @@ class TextIOWrapper(TextIOBase):
return self._simplify(line[:endpos] + "\n") return self._simplify(line[:endpos] + "\n")
else: else:
return self._simplify(line[:nextpos]) return self._simplify(line[:nextpos])
class StringIO(TextIOWrapper):
# XXX This is really slow, but fully functional
def __init__(self, initial_value=""):
super(StringIO, self).__init__(BytesIO(), "utf-8")
if initial_value:
self.write(initial_value)
self.seek(0)
def getvalue(self):
return self.buffer.getvalue().decode("utf-8")
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -13,12 +13,12 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -13,12 +13,12 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('gb2312') tstring = test_multibytecodec_support.load_teststring('gb2312')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x81\x81\xc1\xc4", "strict", None), (b"abc\x81\x81\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"), (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
("abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
("abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
("\xc1\x64", "strict", None), (b"\xc1\x64", "strict", None),
) )
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
...@@ -26,12 +26,12 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -26,12 +26,12 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('gbk') tstring = test_multibytecodec_support.load_teststring('gbk')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
("\x83\x34\x83\x31", "strict", None), (b"\x83\x34\x83\x31", "strict", None),
("\u30fb", "strict", None), ("\u30fb", "strict", None),
) )
...@@ -40,13 +40,13 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -40,13 +40,13 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('gb18030') tstring = test_multibytecodec_support.load_teststring('gb18030')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
("abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"), (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
("\u30fb", "strict", "\x819\xa79"), ("\u30fb", "strict", b"\x819\xa79"),
) )
has_iso10646 = True has_iso10646 = True
......
...@@ -13,11 +13,11 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -13,11 +13,11 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('big5hkscs') tstring = test_multibytecodec_support.load_teststring('big5hkscs')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
) )
def test_main(): def test_main():
......
...@@ -13,14 +13,14 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -13,14 +13,14 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('shift_jis') tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x81\x00\x81\x00\x82\x84", "strict", None), (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
("abc\xf8", "strict", None), (b"abc\xf8", "strict", None),
("abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"), (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
("abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
("abc\x81\x00\x82\x84", "ignore", "abc\uff44"), (b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
# sjis vs cp932 # sjis vs cp932
("\\\x7e", "replace", "\\\x7e"), (b"\\\x7e", "replace", "\\\x7e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
) )
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
...@@ -29,28 +29,28 @@ class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, ...@@ -29,28 +29,28 @@ class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
("abc\x8f\x83\x83", "replace", "abc\ufffd"), (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
("\xc1\x64", "strict", None), (b"\xc1\x64", "strict", None),
("\xa1\xc0", "strict", "\uff3c"), (b"\xa1\xc0", "strict", "\uff3c"),
) )
xmlcharnametest = ( xmlcharnametest = (
"\xab\u211c\xbb = \u2329\u1234\u232a", "\xab\u211c\xbb = \u2329\u1234\u232a",
"\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;" b"\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
) )
eucjp_commontests = ( eucjp_commontests = (
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
("abc\x8f\x83\x83", "replace", "abc\ufffd"), (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
("\xc1\x64", "strict", None), (b"\xc1\x64", "strict", None),
) )
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
...@@ -58,25 +58,25 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, ...@@ -58,25 +58,25 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
encoding = 'euc_jp' encoding = 'euc_jp'
tstring = test_multibytecodec_support.load_teststring('euc_jp') tstring = test_multibytecodec_support.load_teststring('euc_jp')
codectests = eucjp_commontests + ( codectests = eucjp_commontests + (
("\xa1\xc0\\", "strict", "\uff3c\\"), (b"\xa1\xc0\\", "strict", "\uff3c\\"),
("\xa5", "strict", "\x5c"), ("\xa5", "strict", b"\x5c"),
("\u203e", "strict", "\x7e"), ("\u203e", "strict", b"\x7e"),
) )
shiftjis_commonenctests = ( shiftjis_commonenctests = (
("abc\x80\x80\x82\x84", "strict", None), (b"abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None), (b"abc\xf8", "strict", None),
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
) )
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis' encoding = 'shift_jis'
tstring = test_multibytecodec_support.load_teststring('shift_jis') tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + ( codectests = shiftjis_commonenctests + (
("\\\x7e", "strict", "\\\x7e"), (b"\\\x7e", "strict", "\\\x7e"),
("\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
) )
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
...@@ -84,18 +84,18 @@ class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -84,18 +84,18 @@ class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\x82\x84", "strict", None), (b"abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None), (b"abc\xf8", "strict", None),
("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
("abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
# sjis vs cp932 # sjis vs cp932
("\\\x7e", "replace", "\xa5\u203e"), (b"\\\x7e", "replace", "\xa5\u203e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
) )
xmlcharnametest = ( xmlcharnametest = (
"\xab\u211c\xbb = \u2329\u1234\u232a", "\xab\u211c\xbb = \u2329\u1234\u232a",
"\x85G&real;\x85Q = &lang;&#4660;&rang;" b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
) )
def test_main(): def test_main():
......
...@@ -13,11 +13,11 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -13,11 +13,11 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('cp949') tstring = test_multibytecodec_support.load_teststring('cp949')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
) )
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase): class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
...@@ -25,11 +25,11 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -25,11 +25,11 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('euc_kr') tstring = test_multibytecodec_support.load_teststring('euc_kr')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
) )
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase): class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
...@@ -37,11 +37,11 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -37,11 +37,11 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('johab') tstring = test_multibytecodec_support.load_teststring('johab')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
) )
def test_main(): def test_main():
......
...@@ -13,11 +13,11 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase): ...@@ -13,11 +13,11 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('big5') tstring = test_multibytecodec_support.load_teststring('big5')
codectests = ( codectests = (
# invalid bytes # invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None), (b"abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None), (b"abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
) )
def test_main(): def test_main():
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
from test import test_support from test import test_support
from test import test_multibytecodec_support from test import test_multibytecodec_support
from test.test_support import TESTFN from test.test_support import TESTFN
import unittest, StringIO, codecs, sys, os import unittest, io, codecs, sys, os
ALL_CJKENCODINGS = [ ALL_CJKENCODINGS = [
# _codecs_cn # _codecs_cn
...@@ -30,13 +30,13 @@ class Test_MultibyteCodec(unittest.TestCase): ...@@ -30,13 +30,13 @@ class Test_MultibyteCodec(unittest.TestCase):
def test_nullcoding(self): def test_nullcoding(self):
for enc in ALL_CJKENCODINGS: for enc in ALL_CJKENCODINGS:
self.assertEqual(''.decode(enc), '') self.assertEqual(b''.decode(enc), '')
self.assertEqual(str('', enc), '') self.assertEqual(str(b'', enc), '')
self.assertEqual(''.encode(enc), '') self.assertEqual(''.encode(enc), b'')
def test_str_decode(self): def test_str_decode(self):
for enc in ALL_CJKENCODINGS: for enc in ALL_CJKENCODINGS:
self.assertEqual('abcd'.encode(enc), 'abcd') self.assertEqual('abcd'.encode(enc), b'abcd')
def test_errorcallback_longindex(self): def test_errorcallback_longindex(self):
dec = codecs.getdecoder('euc-kr') dec = codecs.getdecoder('euc-kr')
...@@ -48,7 +48,7 @@ class Test_MultibyteCodec(unittest.TestCase): ...@@ -48,7 +48,7 @@ class Test_MultibyteCodec(unittest.TestCase):
def test_codingspec(self): def test_codingspec(self):
try: try:
for enc in ALL_CJKENCODINGS: for enc in ALL_CJKENCODINGS:
print('# coding:', enc, file=open(TESTFN, 'w')) print('# coding:', enc, file=io.open(TESTFN, 'w'))
execfile(TESTFN) execfile(TESTFN)
finally: finally:
os.unlink(TESTFN) os.unlink(TESTFN)
...@@ -59,13 +59,13 @@ class Test_IncrementalEncoder(unittest.TestCase): ...@@ -59,13 +59,13 @@ class Test_IncrementalEncoder(unittest.TestCase):
# cp949 encoder isn't stateful at all. # cp949 encoder isn't stateful at all.
encoder = codecs.getincrementalencoder('cp949')() encoder = codecs.getincrementalencoder('cp949')()
self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'), self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb') b'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
self.assertEqual(encoder.reset(), None) self.assertEqual(encoder.reset(), None)
self.assertEqual(encoder.encode('\u2606\u223c\u2606', True), self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
'\xa1\xd9\xa1\xad\xa1\xd9') b'\xa1\xd9\xa1\xad\xa1\xd9')
self.assertEqual(encoder.reset(), None) self.assertEqual(encoder.reset(), None)
self.assertEqual(encoder.encode('', True), '') self.assertEqual(encoder.encode('', True), b'')
self.assertEqual(encoder.encode('', False), '') self.assertEqual(encoder.encode('', False), b'')
self.assertEqual(encoder.reset(), None) self.assertEqual(encoder.reset(), None)
def test_stateful(self): def test_stateful(self):
...@@ -75,29 +75,29 @@ class Test_IncrementalEncoder(unittest.TestCase): ...@@ -75,29 +75,29 @@ class Test_IncrementalEncoder(unittest.TestCase):
# U+0300 => ABDC # U+0300 => ABDC
encoder = codecs.getincrementalencoder('jisx0213')() encoder = codecs.getincrementalencoder('jisx0213')()
self.assertEqual(encoder.encode('\u00e6\u0300'), '\xab\xc4') self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
self.assertEqual(encoder.encode('\u00e6'), '') self.assertEqual(encoder.encode('\u00e6'), b'')
self.assertEqual(encoder.encode('\u0300'), '\xab\xc4') self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
self.assertEqual(encoder.encode('\u00e6', True), '\xa9\xdc') self.assertEqual(encoder.encode('\u00e6', True), b'\xa9\xdc')
self.assertEqual(encoder.reset(), None) self.assertEqual(encoder.reset(), None)
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc') self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
self.assertEqual(encoder.encode('\u00e6'), '') self.assertEqual(encoder.encode('\u00e6'), b'')
self.assertEqual(encoder.encode('', True), '\xa9\xdc') self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
self.assertEqual(encoder.encode('', True), '') self.assertEqual(encoder.encode('', True), b'')
def test_stateful_keep_buffer(self): def test_stateful_keep_buffer(self):
encoder = codecs.getincrementalencoder('jisx0213')() encoder = codecs.getincrementalencoder('jisx0213')()
self.assertEqual(encoder.encode('\u00e6'), '') self.assertEqual(encoder.encode('\u00e6'), b'')
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123') self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
self.assertEqual(encoder.encode('\u0300\u00e6'), '\xab\xc4') self.assertEqual(encoder.encode('\u0300\u00e6'), b'\xab\xc4')
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123') self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
self.assertEqual(encoder.reset(), None) self.assertEqual(encoder.reset(), None)
self.assertEqual(encoder.encode('\u0300'), '\xab\xdc') self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
self.assertEqual(encoder.encode('\u00e6'), '') self.assertEqual(encoder.encode('\u00e6'), b'')
self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123') self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
self.assertEqual(encoder.encode('', True), '\xa9\xdc') self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
class Test_IncrementalDecoder(unittest.TestCase): class Test_IncrementalDecoder(unittest.TestCase):
...@@ -105,21 +105,21 @@ class Test_IncrementalDecoder(unittest.TestCase): ...@@ -105,21 +105,21 @@ class Test_IncrementalDecoder(unittest.TestCase):
def test_dbcs(self): def test_dbcs(self):
# cp949 decoder is simple with only 1 or 2 bytes sequences. # cp949 decoder is simple with only 1 or 2 bytes sequences.
decoder = codecs.getincrementaldecoder('cp949')() decoder = codecs.getincrementaldecoder('cp949')()
self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'), self.assertEqual(decoder.decode(b'\xc6\xc4\xc0\xcc\xbd'),
'\ud30c\uc774') '\ud30c\uc774')
self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'), self.assertEqual(decoder.decode(b'\xe3 \xb8\xb6\xc0\xbb'),
'\uc36c \ub9c8\uc744') '\uc36c \ub9c8\uc744')
self.assertEqual(decoder.decode(''), '') self.assertEqual(decoder.decode(b''), '')
def test_dbcs_keep_buffer(self): def test_dbcs_keep_buffer(self):
decoder = codecs.getincrementaldecoder('cp949')() decoder = codecs.getincrementaldecoder('cp949')()
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c') self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
self.assertRaises(UnicodeDecodeError, decoder.decode, '', True) self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
self.assertEqual(decoder.decode('\xcc'), '\uc774') self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c') self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True) self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
self.assertEqual(decoder.decode('\xcc'), '\uc774') self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
def test_iso2022(self): def test_iso2022(self):
decoder = codecs.getincrementaldecoder('iso2022-jp')() decoder = codecs.getincrementaldecoder('iso2022-jp')()
...@@ -140,61 +140,61 @@ class Test_IncrementalDecoder(unittest.TestCase): ...@@ -140,61 +140,61 @@ class Test_IncrementalDecoder(unittest.TestCase):
class Test_StreamWriter(unittest.TestCase): class Test_StreamWriter(unittest.TestCase):
if len('\U00012345') == 2: # UCS2 if len('\U00012345') == 2: # UCS2
def test_gb18030(self): def test_gb18030(self):
s= StringIO.StringIO() s= io.BytesIO()
c = codecs.getwriter('gb18030')(s) c = codecs.getwriter('gb18030')(s)
c.write('123') c.write('123')
self.assertEqual(s.getvalue(), '123') self.assertEqual(s.getvalue(), b'123')
c.write('\U00012345') c.write('\U00012345')
self.assertEqual(s.getvalue(), '123\x907\x959') self.assertEqual(s.getvalue(), b'123\x907\x959')
c.write('\U00012345'[0]) c.write('\U00012345'[0])
self.assertEqual(s.getvalue(), '123\x907\x959') self.assertEqual(s.getvalue(), b'123\x907\x959')
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac') c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
c.write('\U00012345'[0]) c.write('\U00012345'[0])
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
self.assertRaises(UnicodeError, c.reset) self.assertRaises(UnicodeError, c.reset)
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
def test_utf_8(self): def test_utf_8(self):
s= StringIO.StringIO() s= io.BytesIO()
c = codecs.getwriter('utf-8')(s) c = codecs.getwriter('utf-8')(s)
c.write('123') c.write('123')
self.assertEqual(s.getvalue(), '123') self.assertEqual(s.getvalue(), b'123')
c.write('\U00012345') c.write('\U00012345')
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
# Python utf-8 codec can't buffer surrogate pairs yet. # Python utf-8 codec can't buffer surrogate pairs yet.
if 0: if 0:
c.write('\U00012345'[0]) c.write('\U00012345'[0])
self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac') c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac') b'\xea\xb0\x80\xc2\xac')
c.write('\U00012345'[0]) c.write('\U00012345'[0])
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac') b'\xea\xb0\x80\xc2\xac')
c.reset() c.reset()
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac\xed\xa0\x88') b'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
c.write('\U00012345'[1]) c.write('\U00012345'[1])
self.assertEqual(s.getvalue(), self.assertEqual(s.getvalue(),
'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85') b'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
else: # UCS4 else: # UCS4
pass pass
def test_streamwriter_strwrite(self): def test_streamwriter_strwrite(self):
s = StringIO.StringIO() s = io.BytesIO()
wr = codecs.getwriter('gb18030')(s) wr = codecs.getwriter('gb18030')(s)
wr.write('abcd') wr.write('abcd')
self.assertEqual(s.getvalue(), 'abcd') self.assertEqual(s.getvalue(), b'abcd')
class Test_ISO2022(unittest.TestCase): class Test_ISO2022(unittest.TestCase):
def test_g2(self): def test_g2(self):
...@@ -203,10 +203,10 @@ class Test_ISO2022(unittest.TestCase): ...@@ -203,10 +203,10 @@ class Test_ISO2022(unittest.TestCase):
self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni) self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
def test_iso2022_jp_g0(self): def test_iso2022_jp_g0(self):
self.failIf('\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2')) self.failIf(b'\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'): for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
e = '\u3406'.encode(encoding) e = '\u3406'.encode(encoding)
self.failIf(filter(lambda x: x >= '\x80', e)) self.failIf(any(x > 0x80 for x in e))
def test_bug1572832(self): def test_bug1572832(self):
if sys.maxunicode >= 0x10000: if sys.maxunicode >= 0x10000:
......
...@@ -7,12 +7,12 @@ ...@@ -7,12 +7,12 @@
import sys, codecs, os.path import sys, codecs, os.path
import unittest import unittest
from test import test_support from test import test_support
from StringIO import StringIO from io import BytesIO
class TestBase: class TestBase:
encoding = '' # codec name encoding = '' # codec name
codec = None # codec tuple (with 4 elements) codec = None # codec tuple (with 4 elements)
tstring = '' # string to test StreamReader tstring = None # must set. 2 strings to test StreamReader
codectests = None # must set. codec test tuple codectests = None # must set. codec test tuple
roundtriptest = 1 # set if roundtrip is possible with unicode roundtriptest = 1 # set if roundtrip is possible with unicode
...@@ -31,7 +31,7 @@ class TestBase: ...@@ -31,7 +31,7 @@ class TestBase:
self.incrementaldecoder = self.codec.incrementaldecoder self.incrementaldecoder = self.codec.incrementaldecoder
def test_chunkcoding(self): def test_chunkcoding(self):
for native, utf8 in zip(*[StringIO(f).readlines() for native, utf8 in zip(*[map(bytes, str8(f).splitlines(1))
for f in self.tstring]): for f in self.tstring]):
u = self.decode(native)[0] u = self.decode(native)[0]
self.assertEqual(u, utf8.decode('utf-8')) self.assertEqual(u, utf8.decode('utf-8'))
...@@ -40,7 +40,7 @@ class TestBase: ...@@ -40,7 +40,7 @@ class TestBase:
def test_errorhandle(self): def test_errorhandle(self):
for source, scheme, expected in self.codectests: for source, scheme, expected in self.codectests:
if type(source) == type(''): if isinstance(source, bytes):
func = self.decode func = self.decode
else: else:
func = self.encode func = self.encode
...@@ -57,7 +57,7 @@ class TestBase: ...@@ -57,7 +57,7 @@ class TestBase:
s = "\u0b13\u0b23\u0b60 nd eggs" s = "\u0b13\u0b23\u0b60 nd eggs"
self.assertEqual( self.assertEqual(
self.encode(s, "xmlcharrefreplace")[0], self.encode(s, "xmlcharrefreplace")[0],
"&#2835;&#2851;&#2912; nd eggs" b"&#2835;&#2851;&#2912; nd eggs"
) )
def test_customreplace_encode(self): def test_customreplace_encode(self):
...@@ -83,7 +83,7 @@ class TestBase: ...@@ -83,7 +83,7 @@ class TestBase:
sin, sout = self.xmlcharnametest sin, sout = self.xmlcharnametest
else: else:
sin = "\xab\u211c\xbb = \u2329\u1234\u232a" sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;" sout = b"&laquo;&real;&raquo; = &lang;&#4660;&rang;"
self.assertEqual(self.encode(sin, self.assertEqual(self.encode(sin,
"test.xmlcharnamereplace")[0], sout) "test.xmlcharnamereplace")[0], sout)
...@@ -92,7 +92,7 @@ class TestBase: ...@@ -92,7 +92,7 @@ class TestBase:
return (ret, exc.end) return (ret, exc.end)
codecs.register_error("test.cjktest", myreplace) codecs.register_error("test.cjktest", myreplace)
for ret in ([1, 2, 3], [], None, object(), 'string', ''): for ret in ([1, 2, 3], [], None, object(), b'string', b''):
self.assertRaises(TypeError, self.encode, self.unmappedunicode, self.assertRaises(TypeError, self.encode, self.unmappedunicode,
'test.cjktest') 'test.cjktest')
...@@ -101,7 +101,7 @@ class TestBase: ...@@ -101,7 +101,7 @@ class TestBase:
return ('x', int(exc.end)) return ('x', int(exc.end))
codecs.register_error("test.cjktest", myreplace) codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh', self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
'test.cjktest'), ('abcdxefgh', 9)) 'test.cjktest'), (b'abcdxefgh', 9))
def myreplace(exc): def myreplace(exc):
return ('x', sys.maxint + 1) return ('x', sys.maxint + 1)
...@@ -127,14 +127,14 @@ class TestBase: ...@@ -127,14 +127,14 @@ class TestBase:
codecs.register_error("test.cjktest", myreplace) codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh', self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
'test.cjktest'), 'test.cjktest'),
('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9)) (b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
def test_callback_forward_index(self): def test_callback_forward_index(self):
def myreplace(exc): def myreplace(exc):
return ('REPLACED', exc.end + 2) return ('REPLACED', exc.end + 2)
codecs.register_error("test.cjktest", myreplace) codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh', self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
'test.cjktest'), ('abcdREPLACEDgh', 9)) 'test.cjktest'), (b'abcdREPLACEDgh', 9))
def test_callback_index_outofbound(self): def test_callback_index_outofbound(self):
def myreplace(exc): def myreplace(exc):
...@@ -147,8 +147,8 @@ class TestBase: ...@@ -147,8 +147,8 @@ class TestBase:
UTF8Reader = codecs.getreader('utf-8') UTF8Reader = codecs.getreader('utf-8')
for sizehint in [None] + list(range(1, 33)) + \ for sizehint in [None] + list(range(1, 33)) + \
[64, 128, 256, 512, 1024]: [64, 128, 256, 512, 1024]:
istream = UTF8Reader(StringIO(self.tstring[1])) istream = UTF8Reader(BytesIO(self.tstring[1]))
ostream = StringIO() ostream = BytesIO()
encoder = self.incrementalencoder() encoder = self.incrementalencoder()
while 1: while 1:
if sizehint is not None: if sizehint is not None:
...@@ -167,8 +167,8 @@ class TestBase: ...@@ -167,8 +167,8 @@ class TestBase:
UTF8Writer = codecs.getwriter('utf-8') UTF8Writer = codecs.getwriter('utf-8')
for sizehint in [None, -1] + list(range(1, 33)) + \ for sizehint in [None, -1] + list(range(1, 33)) + \
[64, 128, 256, 512, 1024]: [64, 128, 256, 512, 1024]:
istream = StringIO(self.tstring[0]) istream = BytesIO(self.tstring[0])
ostream = UTF8Writer(StringIO()) ostream = UTF8Writer(BytesIO())
decoder = self.incrementaldecoder() decoder = self.incrementaldecoder()
while 1: while 1:
data = istream.read(sizehint) data = istream.read(sizehint)
...@@ -187,26 +187,26 @@ class TestBase: ...@@ -187,26 +187,26 @@ class TestBase:
self.assertRaises(UnicodeEncodeError, e.encode, inv, True) self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
e.errors = 'ignore' e.errors = 'ignore'
self.assertEqual(e.encode(inv, True), '') self.assertEqual(e.encode(inv, True), b'')
e.reset() e.reset()
def tempreplace(exc): def tempreplace(exc):
return ('called', exc.end) return ('called', exc.end)
codecs.register_error('test.incremental_error_callback', tempreplace) codecs.register_error('test.incremental_error_callback', tempreplace)
e.errors = 'test.incremental_error_callback' e.errors = 'test.incremental_error_callback'
self.assertEqual(e.encode(inv, True), 'called') self.assertEqual(e.encode(inv, True), b'called')
# again # again
e.errors = 'ignore' e.errors = 'ignore'
self.assertEqual(e.encode(inv, True), '') self.assertEqual(e.encode(inv, True), b'')
def test_streamreader(self): def test_streamreader(self):
UTF8Writer = codecs.getwriter('utf-8') UTF8Writer = codecs.getwriter('utf-8')
for name in ["read", "readline", "readlines"]: for name in ["read", "readline", "readlines"]:
for sizehint in [None, -1] + list(range(1, 33)) + \ for sizehint in [None, -1] + list(range(1, 33)) + \
[64, 128, 256, 512, 1024]: [64, 128, 256, 512, 1024]:
istream = self.reader(StringIO(self.tstring[0])) istream = self.reader(BytesIO(self.tstring[0]))
ostream = UTF8Writer(StringIO()) ostream = UTF8Writer(BytesIO())
func = getattr(istream, name) func = getattr(istream, name)
while 1: while 1:
data = func(sizehint) data = func(sizehint)
...@@ -225,8 +225,8 @@ class TestBase: ...@@ -225,8 +225,8 @@ class TestBase:
for name in readfuncs: for name in readfuncs:
for sizehint in [None] + list(range(1, 33)) + \ for sizehint in [None] + list(range(1, 33)) + \
[64, 128, 256, 512, 1024]: [64, 128, 256, 512, 1024]:
istream = UTF8Reader(StringIO(self.tstring[1])) istream = UTF8Reader(BytesIO(self.tstring[1]))
ostream = self.writer(StringIO()) ostream = self.writer(BytesIO())
func = getattr(istream, name) func = getattr(istream, name)
while 1: while 1:
if sizehint is not None: if sizehint is not None:
......
...@@ -138,6 +138,11 @@ codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, ...@@ -138,6 +138,11 @@ codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
{ {
PyObject *cb; PyObject *cb;
if (PyUnicode_Check(value)) {
value = _PyUnicode_AsDefaultEncodedString(value, NULL);
if (value == NULL)
return -1;
}
if (!PyString_Check(value)) { if (!PyString_Check(value)) {
PyErr_SetString(PyExc_TypeError, "errors must be a string"); PyErr_SetString(PyExc_TypeError, "errors must be a string");
return -1; return -1;
...@@ -322,11 +327,11 @@ multibytecodec_encerror(MultibyteCodec *codec, ...@@ -322,11 +327,11 @@ multibytecodec_encerror(MultibyteCodec *codec,
goto errorexit; goto errorexit;
} }
assert(PyString_Check(retstr)); assert(PyBytes_Check(retstr));
retstrsize = PyString_GET_SIZE(retstr); retstrsize = PyBytes_GET_SIZE(retstr);
REQUIRE_ENCODEBUFFER(buf, retstrsize); REQUIRE_ENCODEBUFFER(buf, retstrsize);
memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
buf->outbuf += retstrsize; buf->outbuf += retstrsize;
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
...@@ -1224,10 +1229,18 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, ...@@ -1224,10 +1229,18 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
if (cres == NULL) if (cres == NULL)
goto errorexit; goto errorexit;
if (PyString_Check(cres)) {
PyObject *cres2 = PyBytes_FromObject(cres);
if (cres2 == NULL)
return NULL;
Py_DECREF(cres);
cres = cres2;
}
if (!PyBytes_Check(cres)) { if (!PyBytes_Check(cres)) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"stream function returned a " "stream function returned a "
"non-string object (%.100s)", "non-bytes object (%.100s)",
cres->ob_type->tp_name); cres->ob_type->tp_name);
goto errorexit; goto errorexit;
} }
...@@ -1596,8 +1609,8 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self) ...@@ -1596,8 +1609,8 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
if (pwrt == NULL) if (pwrt == NULL)
return NULL; return NULL;
assert(PyString_Check(pwrt)); assert(PyBytes_Check(pwrt));
if (PyString_Size(pwrt) > 0) { if (PyBytes_Size(pwrt) > 0) {
PyObject *wr; PyObject *wr;
wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
if (wr == NULL) { if (wr == NULL) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment