Kaydet (Commit) 9cbfffd1 authored tarafından Guido van Rossum's avatar Guido van Rossum

tokenizer.c: make coding markup work again.

io.open() now takes all positional parameters (so we can conveniently
call it from C code).

test_tarfile.py no longer uses u"..." literals, but is otherwise still
badly broken.

This is a checkpoint; some more stuff now breaks.
üst e7ba4956
...@@ -49,7 +49,7 @@ class BlockingIOError(IOError): ...@@ -49,7 +49,7 @@ class BlockingIOError(IOError):
self.characters_written = characters_written self.characters_written = characters_written
def open(file, mode="r", buffering=None, *, encoding=None, newline=None): def open(file, mode="r", buffering=None, encoding=None, newline=None):
"""Replacement for the built-in open function. """Replacement for the built-in open function.
Args: Args:
...@@ -59,7 +59,6 @@ def open(file, mode="r", buffering=None, *, encoding=None, newline=None): ...@@ -59,7 +59,6 @@ def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
buffering: optional int >= 0 giving the buffer size; values buffering: optional int >= 0 giving the buffer size; values
can be: 0 = unbuffered, 1 = line buffered, can be: 0 = unbuffered, 1 = line buffered,
larger = fully buffered. larger = fully buffered.
Keywords (for text modes only; *must* be given as keyword arguments):
encoding: optional string giving the text encoding. encoding: optional string giving the text encoding.
newline: optional newlines specifier; must be None, '\n' or '\r\n'; newline: optional newlines specifier; must be None, '\n' or '\r\n';
specifies the line ending expected on input and written on specifies the line ending expected on input and written on
......
...@@ -432,17 +432,17 @@ class PaxReadTest(LongnameTest): ...@@ -432,17 +432,17 @@ class PaxReadTest(LongnameTest):
tarinfo = tar.getmember("pax/regtype1") tarinfo = tar.getmember("pax/regtype1")
self.assertEqual(tarinfo.uname, "foo") self.assertEqual(tarinfo.uname, "foo")
self.assertEqual(tarinfo.gname, "bar") self.assertEqual(tarinfo.gname, "bar")
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"") self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
tarinfo = tar.getmember("pax/regtype2") tarinfo = tar.getmember("pax/regtype2")
self.assertEqual(tarinfo.uname, "") self.assertEqual(tarinfo.uname, "")
self.assertEqual(tarinfo.gname, "bar") self.assertEqual(tarinfo.gname, "bar")
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"") self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
tarinfo = tar.getmember("pax/regtype3") tarinfo = tar.getmember("pax/regtype3")
self.assertEqual(tarinfo.uname, "tarfile") self.assertEqual(tarinfo.uname, "tarfile")
self.assertEqual(tarinfo.gname, "tarfile") self.assertEqual(tarinfo.gname, "tarfile")
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"") self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
def test_pax_number_fields(self): def test_pax_number_fields(self):
# All following number fields are read from the pax header. # All following number fields are read from the pax header.
...@@ -727,11 +727,11 @@ class PaxWriteTest(GNUWriteTest): ...@@ -727,11 +727,11 @@ class PaxWriteTest(GNUWriteTest):
def test_pax_global_header(self): def test_pax_global_header(self):
pax_headers = { pax_headers = {
u"foo": u"bar", "foo": "bar",
u"uid": u"0", "uid": "0",
u"mtime": u"1.23", "mtime": "1.23",
u"test": u"", "test": "",
u"": u"test"} "": "test"}
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
pax_headers=pax_headers) pax_headers=pax_headers)
...@@ -756,11 +756,11 @@ class PaxWriteTest(GNUWriteTest): ...@@ -756,11 +756,11 @@ class PaxWriteTest(GNUWriteTest):
def test_pax_extended_header(self): def test_pax_extended_header(self):
# The fields from the pax header have priority over the # The fields from the pax header have priority over the
# TarInfo. # TarInfo.
pax_headers = {u"path": u"foo", u"uid": u"123"} pax_headers = {"path": "foo", "uid": "123"}
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1") tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
t = tarfile.TarInfo() t = tarfile.TarInfo()
t.name = u"" # non-ASCII t.name = "" # non-ASCII
t.uid = 8**8 # too large t.uid = 8**8 # too large
t.pax_headers = pax_headers t.pax_headers = pax_headers
tar.addfile(t) tar.addfile(t)
...@@ -808,11 +808,11 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -808,11 +808,11 @@ class UstarUnicodeTest(unittest.TestCase):
else: else:
tar.addfile(tarinfo) tar.addfile(tarinfo)
tarinfo.name = u"" tarinfo.name = ""
self.assertRaises(UnicodeError, tar.addfile, tarinfo) self.assertRaises(UnicodeError, tar.addfile, tarinfo)
tarinfo.name = "foo" tarinfo.name = "foo"
tarinfo.uname = u"" tarinfo.uname = ""
self.assertRaises(UnicodeError, tar.addfile, tarinfo) self.assertRaises(UnicodeError, tar.addfile, tarinfo)
def test_unicode_argument(self): def test_unicode_argument(self):
...@@ -825,7 +825,7 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -825,7 +825,7 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close() tar.close()
def test_uname_unicode(self): def test_uname_unicode(self):
for name in (u"", ""): for name in ("", ""):
t = tarfile.TarInfo("foo") t = tarfile.TarInfo("foo")
t.uname = name t.uname = name
t.gname = name t.gname = name
...@@ -860,9 +860,9 @@ class PaxUnicodeTest(UstarUnicodeTest): ...@@ -860,9 +860,9 @@ class PaxUnicodeTest(UstarUnicodeTest):
def test_error_handlers(self): def test_error_handlers(self):
# Test if the unicode error handlers work correctly for characters # Test if the unicode error handlers work correctly for characters
# that cannot be expressed in a given encoding. # that cannot be expressed in a given encoding.
self._create_unicode_name(u"") self._create_unicode_name("")
for handler, name in (("utf-8", u"".encode("utf8")), for handler, name in (("utf-8", "".encode("utf8")),
("replace", "???"), ("ignore", "")): ("replace", "???"), ("ignore", "")):
tar = tarfile.open(tmpname, format=self.format, encoding="ascii", tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
errors=handler) errors=handler)
...@@ -874,11 +874,11 @@ class PaxUnicodeTest(UstarUnicodeTest): ...@@ -874,11 +874,11 @@ class PaxUnicodeTest(UstarUnicodeTest):
def test_error_handler_utf8(self): def test_error_handler_utf8(self):
# Create a pathname that has one component representable using # Create a pathname that has one component representable using
# iso8859-1 and the other only in iso8859-15. # iso8859-1 and the other only in iso8859-15.
self._create_unicode_name(u"/") self._create_unicode_name("/")
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1", tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
errors="utf-8") errors="utf-8")
self.assertEqual(tar.getnames()[0], "/" + u"".encode("utf8")) self.assertEqual(tar.getnames()[0], "/" + "".encode("utf8"))
class AppendTest(unittest.TestCase): class AppendTest(unittest.TestCase):
......
...@@ -396,25 +396,29 @@ fp_readl(char *s, int size, struct tok_state *tok) ...@@ -396,25 +396,29 @@ fp_readl(char *s, int size, struct tok_state *tok)
static int static int
fp_setreadl(struct tok_state *tok, const char* enc) fp_setreadl(struct tok_state *tok, const char* enc)
{ {
PyObject *reader, *stream, *readline; PyObject *readline = NULL, *stream = NULL, *io = NULL;
int ok = 0;
/* XXX: constify filename argument. */ io = PyImport_ImportModule("io");
stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL); if (io == NULL)
if (stream == NULL) goto cleanup;
return 0;
reader = PyCodec_StreamReader(enc, stream, NULL); stream = PyObject_CallMethod(io, "open", "ssis",
Py_DECREF(stream); tok->filename, "r", -1, enc);
if (reader == NULL) if (stream == NULL)
return 0; goto cleanup;
readline = PyObject_GetAttrString(reader, "readline"); readline = PyObject_GetAttrString(stream, "readline");
Py_DECREF(reader);
if (readline == NULL) if (readline == NULL)
return 0; goto cleanup;
tok->decoding_readline = readline; tok->decoding_readline = readline;
return 1; ok = 1;
cleanup:
Py_XDECREF(stream);
Py_XDECREF(io);
return ok;
} }
/* Fetch the next byte from TOK. */ /* Fetch the next byte from TOK. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment