Kaydet (Commit) d624f18a authored tarafından Guido van Rossum's avatar Guido van Rossum

Added much functionality to the bytes type.

Change file.readinto() to require binary mode.
üst e06b6b8f
...@@ -21,7 +21,7 @@ extern "C" { ...@@ -21,7 +21,7 @@ extern "C" {
/* Object layout */ /* Object layout */
typedef struct { typedef struct {
PyObject_VAR_HEAD PyObject_VAR_HEAD
char *ob_sval; char *ob_bytes;
} PyBytesObject; } PyBytesObject;
/* Type object */ /* Type object */
...@@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type; ...@@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type) #define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
/* Direct API functions */ /* Direct API functions */
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t); PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
/* Macros, trading safety for speed */ /* Macros, trading safety for speed */
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval) #define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size) #define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
#ifdef __cplusplus #ifdef __cplusplus
......
"""Unit tests for the bytes type.""" """Unit tests for the bytes type."""
import os
import re
import sys import sys
import tempfile
import unittest import unittest
import test.test_support import test.test_support
...@@ -45,7 +48,7 @@ class BytesTest(unittest.TestCase): ...@@ -45,7 +48,7 @@ class BytesTest(unittest.TestCase):
self.assertRaises(ValueError, bytes, [C(256)]) self.assertRaises(ValueError, bytes, [C(256)])
def test_constructor_type_errors(self): def test_constructor_type_errors(self):
self.assertRaises(TypeError, bytes, 0) self.assertRaises(TypeError, bytes, 0.0)
class C: class C:
pass pass
self.assertRaises(TypeError, bytes, ["0"]) self.assertRaises(TypeError, bytes, ["0"])
...@@ -100,36 +103,233 @@ class BytesTest(unittest.TestCase): ...@@ -100,36 +103,233 @@ class BytesTest(unittest.TestCase):
self.failUnless(bytes.__doc__ != None) self.failUnless(bytes.__doc__ != None)
self.failUnless(bytes.__doc__.startswith("bytes(")) self.failUnless(bytes.__doc__.startswith("bytes("))
# XXX More stuff to test and build (TDD): def test_buffer_api(self):
# constructor from str: bytes(<str>) == bytes(map(ord, <str>))? short_sample = "Hello world\n"
# encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]]) sample = short_sample + "x"*(20 - len(short_sample))
# default encoding Latin-1? (Matching ord) tfn = tempfile.mktemp()
# slicing try:
# extended slicing? # Prepare
# item assignment with open(tfn, "wb") as f:
# slice assignment f.write(short_sample)
# extended slice assignment? # Test readinto
# __contains__ with simple int arg with open(tfn, "rb") as f:
# __contains__ with another bytes arg? b = bytes([ord('x')]*20)
# find/index? (int or bytes arg?) n = f.readinto(b)
# count? (int arg) self.assertEqual(n, len(short_sample))
# concatenation (+) self.assertEqual(list(b), map(ord, sample))
# repeat? # Test writing in binary mode
# extend? with open(tfn, "wb") as f:
# append? f.write(b)
# insert? with open(tfn, "rb") as f:
# pop? self.assertEqual(f.read(), sample)
# __reversed__? # Test writing in text mode
# reverse? (inplace) with open(tfn, "w") as f:
# NOT sort! f.write(b)
with open(tfn, "r") as f:
self.assertEqual(f.read(), sample)
# Can't use readinto in text mode
with open(tfn, "r") as f:
self.assertRaises(TypeError, f.readinto, b)
finally:
try:
os.remove(tfn)
except os.error:
pass
def test_reversed(self):
input = map(ord, "Hello")
b = bytes(input)
output = list(reversed(b))
input.reverse()
self.assertEqual(output, input)
def test_getslice(self):
def by(s):
return bytes(map(ord, s))
b = by("Hello, world")
self.assertEqual(b[:5], by("Hello"))
self.assertEqual(b[1:5], by("ello"))
self.assertEqual(b[5:7], by(", "))
self.assertEqual(b[7:], by("world"))
self.assertEqual(b[7:12], by("world"))
self.assertEqual(b[7:100], by("world"))
self.assertEqual(b[:-7], by("Hello"))
self.assertEqual(b[-11:-7], by("ello"))
self.assertEqual(b[-7:-5], by(", "))
self.assertEqual(b[-5:], by("world"))
self.assertEqual(b[-5:12], by("world"))
self.assertEqual(b[-5:100], by("world"))
self.assertEqual(b[-100:5], by("Hello"))
def test_regexps(self):
def by(s):
return bytes(map(ord, s))
b = by("Hello, world")
self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
def test_setitem(self):
b = bytes([1, 2, 3])
b[1] = 100
self.assertEqual(b, bytes([1, 100, 3]))
b[-1] = 200
self.assertEqual(b, bytes([1, 100, 200]))
class C:
def __init__(self, i=0):
self.i = i
def __index__(self):
return self.i
b[0] = C(10)
self.assertEqual(b, bytes([10, 100, 200]))
try:
b[3] = 0
self.fail("Didn't raise IndexError")
except IndexError:
pass
try:
b[-10] = 0
self.fail("Didn't raise IndexError")
except IndexError:
pass
try:
b[0] = 256
self.fail("Didn't raise ValueError")
except ValueError:
pass
try:
b[0] = C(-1)
self.fail("Didn't raise ValueError")
except ValueError:
pass
try:
b[0] = None
self.fail("Didn't raise TypeError")
except TypeError:
pass
def test_delitem(self):
b = bytes(range(10))
del b[0]
self.assertEqual(b, bytes(range(1, 10)))
del b[-1]
self.assertEqual(b, bytes(range(1, 9)))
del b[4]
self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
def test_setslice(self):
b = bytes(range(10))
self.assertEqual(list(b), list(range(10)))
b[0:5] = bytes([1, 1, 1, 1, 1])
self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
del b[0:-5]
self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
b[0:0] = bytes([0, 1, 2, 3, 4])
self.assertEqual(b, bytes(range(10)))
b[-7:-3] = bytes([100, 101])
self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
b[3:5] = [3, 4, 5, 6]
self.assertEqual(b, bytes(range(10)))
def test_setslice_trap(self):
# This test verifies that we correctly handle assigning self
# to a slice of self (the old Lambert Meertens trap).
b = bytes(range(256))
b[8:] = b
self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
def test_encoding(self):
sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
for enc in ("utf8", "utf16"):
b = bytes(sample, enc)
self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
b = bytes(sample, "latin1", "ignore")
self.assertEqual(b, bytes(sample[:-4]))
def test_decode(self):
sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
for enc in ("utf8", "utf16"):
b = bytes(sample, enc)
self.assertEqual(b.decode(enc), sample)
sample = u"Hello world\n\x80\x81\xfe\xff"
b = bytes(sample, "latin1")
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
def test_from_buffer(self):
sample = "Hello world\n\x80\x81\xfe\xff"
buf = buffer(sample)
b = bytes(buf)
self.assertEqual(b, bytes(map(ord, sample)))
def test_to_str(self):
sample = "Hello world\n\x80\x81\xfe\xff"
b = bytes(sample)
self.assertEqual(str(b), sample)
def test_from_int(self):
b = bytes(0)
self.assertEqual(b, bytes())
b = bytes(10)
self.assertEqual(b, bytes([0]*10))
b = bytes(10000)
self.assertEqual(b, bytes([0]*10000))
def test_concat(self):
b1 = bytes("abc")
b2 = bytes("def")
self.assertEqual(b1 + b2, bytes("abcdef"))
self.assertRaises(TypeError, lambda: b1 + "def")
self.assertRaises(TypeError, lambda: "abc" + b2)
def test_repeat(self):
b = bytes("abc")
self.assertEqual(b * 3, bytes("abcabcabc"))
self.assertEqual(b * 0, bytes())
self.assertEqual(b * -1, bytes())
self.assertRaises(TypeError, lambda: b * 3.14)
self.assertRaises(TypeError, lambda: 3.14 * b)
self.assertRaises(MemoryError, lambda: b * sys.maxint)
self.assertEqual(bytes('x')*100, bytes('x'*100))
# Optimizations:
# __iter__? (optimization) # __iter__? (optimization)
# __str__? (could return "".join(map(chr, self)) # __reversed__? (optimization)
# decode
# buffer API # XXX Some list methods?
# check that regexp searches work # extended slicing
# (I suppose re.sub() returns a string) # extended slice assignment
# file.readinto # extend (same as b[len(b):] = src)
# file.write # reverse (in-place)
# remove
# pop
# NOT sort!
# With int arg:
# __contains__
# index
# count
# append
# insert
# XXX Some string methods? (Those that don't use character properties)
# startswith
# endswidth
# find, rfind
# __contains__ (bytes arg)
# index, rindex (bytes arg)
# join
# replace
# translate
# split, rsplit
# lstrip, rstrip, strip??
# XXX pickle and marshal support?
def test_main(): def test_main():
...@@ -137,5 +337,5 @@ def test_main(): ...@@ -137,5 +337,5 @@ def test_main():
if __name__ == "__main__": if __name__ == "__main__":
##test_main() test_main()
unittest.main() ##unittest.main()
...@@ -67,6 +67,17 @@ n = f.readinto(a) ...@@ -67,6 +67,17 @@ n = f.readinto(a)
f.close() f.close()
verify(buf == a.tostring()[:n]) verify(buf == a.tostring()[:n])
# verify readinto refuses text files
a = array('c', 'x'*10)
f = open(TESTFN, 'r')
try:
f.readinto(a)
raise TestFailed("readinto shouldn't work in text mode")
except TypeError:
pass
finally:
f.close()
# verify writelines with integers # verify writelines with integers
f = open(TESTFN, 'wb') f = open(TESTFN, 'wb')
try: try:
...@@ -261,13 +272,13 @@ methods = [("readline", ()), ("read", ()), ("readlines", ()), ...@@ -261,13 +272,13 @@ methods = [("readline", ()), ("read", ()), ("readlines", ()),
try: try:
# Prepare the testfile # Prepare the testfile
bag = open(TESTFN, "w") bag = open(TESTFN, "wb")
bag.write(filler * nchunks) bag.write(filler * nchunks)
bag.writelines(testlines) bag.writelines(testlines)
bag.close() bag.close()
# Test for appropriate errors mixing read* and iteration # Test for appropriate errors mixing read* and iteration
for methodname, args in methods: for methodname, args in methods:
f = open(TESTFN) f = open(TESTFN, 'rb')
if f.next() != filler: if f.next() != filler:
raise TestFailed, "Broken testfile" raise TestFailed, "Broken testfile"
meth = getattr(f, methodname) meth = getattr(f, methodname)
...@@ -286,7 +297,7 @@ try: ...@@ -286,7 +297,7 @@ try:
# Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
# 4096 lines of that should get us exactly on the buffer boundary for # 4096 lines of that should get us exactly on the buffer boundary for
# any power-of-2 buffersize between 4 and 16384 (inclusive). # any power-of-2 buffersize between 4 and 16384 (inclusive).
f = open(TESTFN) f = open(TESTFN, 'rb')
for i in range(nchunks): for i in range(nchunks):
f.next() f.next()
testline = testlines.pop(0) testline = testlines.pop(0)
...@@ -328,7 +339,7 @@ try: ...@@ -328,7 +339,7 @@ try:
raise TestFailed("readlines() after next() with empty buffer " raise TestFailed("readlines() after next() with empty buffer "
"failed. Got %r, expected %r" % (line, testline)) "failed. Got %r, expected %r" % (line, testline))
# Reading after iteration hit EOF shouldn't hurt either # Reading after iteration hit EOF shouldn't hurt either
f = open(TESTFN) f = open(TESTFN, 'rb')
try: try:
for line in f: for line in f:
pass pass
......
This diff is collapsed.
...@@ -880,6 +880,11 @@ file_readinto(PyFileObject *f, PyObject *args) ...@@ -880,6 +880,11 @@ file_readinto(PyFileObject *f, PyObject *args)
if (f->f_fp == NULL) if (f->f_fp == NULL)
return err_closed(); return err_closed();
if (!f->f_binary) {
PyErr_SetString(PyExc_TypeError,
"readinto() requires binary mode");
return NULL;
}
/* refuse to mix with f.next() */ /* refuse to mix with f.next() */
if (f->f_buf != NULL && if (f->f_buf != NULL &&
(f->f_bufend - f->f_bufptr) > 0 && (f->f_bufend - f->f_bufptr) > 0 &&
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment