tokenizer.c: make coding markup work again.

io.open() now takes all positional parameters (so we can conveniently call it from C code). test_tarfile.py no longer uses u"..." literals, but is otherwise still badly broken. This is a checkpoint; some more stuff now breaks.

tokenizer.c: make coding markup work again.
io.open() now takes all positional parameters (so we can conveniently call it from C code). test_tarfile.py no longer uses u"..." literals, but is otherwise still badly broken. This is a checkpoint; some more stuff now breaks.
9cbfffd1 · Guido van Rossum · e7ba4956 · 9cbfffd1 · 9cbfffd1 · 9cbfffd1
Kaydet (Commit) 9cbfffd1 authored Haz 07, 2007 tarafından Guido van Rossum
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 32 deletions

io.py Lib/io.py +1 -2

test_tarfile.py Lib/test/test_tarfile.py +17 -17

tokenizer.c Parser/tokenizer.c +17 -13

No files found.
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -49,7 +49,7 @@ class BlockingIOError(IOError):
        self.characters_written = characters_written
-def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
+def open(file, mode="r", buffering=None, encoding=None, newline=None):
    """Replacement for the built-in open function.
    Args:
@@ -59,7 +59,6 @@ def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
      buffering: optional int >= 0 giving the buffer size; values
                 can be: 0 = unbuffered, 1 = line buffered,
                 larger = fully buffered.
-    Keywords (for text modes only; *must* be given as keyword arguments):
      encoding: optional string giving the text encoding.
      newline: optional newlines specifier; must be None, '\n' or '\r\n';
               specifies the line ending expected on input and written on

--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -432,17 +432,17 @@ class PaxReadTest(LongnameTest):
        tarinfo = tar.getmember("pax/regtype1")
        self.assertEqual(tarinfo.uname, "foo")
        self.assertEqual(tarinfo.gname, "bar")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
        tarinfo = tar.getmember("pax/regtype2")
        self.assertEqual(tarinfo.uname, "")
        self.assertEqual(tarinfo.gname, "bar")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
        tarinfo = tar.getmember("pax/regtype3")
        self.assertEqual(tarinfo.uname, "tarfile")
        self.assertEqual(tarinfo.gname, "tarfile")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "")
    def test_pax_number_fields(self):
        # All following number fields are read from the pax header.
@@ -727,11 +727,11 @@ class PaxWriteTest(GNUWriteTest):
    def test_pax_global_header(self):
        pax_headers = {
-                u"foo": u"bar",
+                "foo": "bar",
-                u"uid": u"0",
+                "uid": "0",
-                u"mtime": u"1.23",
+                "mtime": "1.23",
-                u"test": u"",
+                "test": "",
-                u"": u"test"}
+                "": "test"}
        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
                pax_headers=pax_headers)
@@ -756,11 +756,11 @@ class PaxWriteTest(GNUWriteTest):
    def test_pax_extended_header(self):
        # The fields from the pax header have priority over the
        # TarInfo.
-        pax_headers = {u"path": u"foo", u"uid": u"123"}
+        pax_headers = {"path": "foo", "uid": "123"}
        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
        t = tarfile.TarInfo()
-        t.name = u""     # non-ASCII
+        t.name = ""     # non-ASCII
        t.uid = 8**8        # too large
        t.pax_headers = pax_headers
        tar.addfile(t)
@@ -808,11 +808,11 @@ class UstarUnicodeTest(unittest.TestCase):
        else:
            tar.addfile(tarinfo)
-        tarinfo.name = u""
+        tarinfo.name = ""
        self.assertRaises(UnicodeError, tar.addfile, tarinfo)
        tarinfo.name = "foo"
-        tarinfo.uname = u""
+        tarinfo.uname = ""
        self.assertRaises(UnicodeError, tar.addfile, tarinfo)
    def test_unicode_argument(self):
@@ -825,7 +825,7 @@ class UstarUnicodeTest(unittest.TestCase):
        tar.close()
    def test_uname_unicode(self):
-        for name in (u"", ""):
+        for name in ("", ""):
            t = tarfile.TarInfo("foo")
            t.uname = name
            t.gname = name
@@ -860,9 +860,9 @@ class PaxUnicodeTest(UstarUnicodeTest):
    def test_error_handlers(self):
        # Test if the unicode error handlers work correctly for characters
        # that cannot be expressed in a given encoding.
-        self._create_unicode_name(u"")
+        self._create_unicode_name("")
-        for handler, name in (("utf-8", u"".encode("utf8")),
+        for handler, name in (("utf-8", "".encode("utf8")),
                    ("replace", "???"), ("ignore", "")):
            tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
                    errors=handler)
@@ -874,11 +874,11 @@ class PaxUnicodeTest(UstarUnicodeTest):
    def test_error_handler_utf8(self):
        # Create a pathname that has one component representable using
        # iso8859-1 and the other only in iso8859-15.
-        self._create_unicode_name(u"/")
+        self._create_unicode_name("/")
        tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
                errors="utf-8")
-        self.assertEqual(tar.getnames()[0], "/" + u"".encode("utf8"))
+        self.assertEqual(tar.getnames()[0], "/" + "".encode("utf8"))
 class AppendTest(unittest.TestCase):

--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -396,25 +396,29 @@ fp_readl(char *s, int size, struct tok_state *tok)
 static int
 fp_setreadl(struct tok_state *tok, const char* enc)
 {
-	PyObject *reader, *stream, *readline;
+	PyObject *readline = NULL, *stream = NULL, *io = NULL;
+	int ok = 0;
-	/* XXX: constify filename argument. */
+	io = PyImport_ImportModule("io");
-	stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
+	if (io == NULL)
-	if (stream == NULL)
+		goto cleanup;
-		return 0;
-	reader = PyCodec_StreamReader(enc, stream, NULL);
+	stream = PyObject_CallMethod(io, "open", "ssis",
-	Py_DECREF(stream);
+				     tok->filename, "r", -1, enc);
-	if (reader == NULL)
+	if (stream == NULL)
-		return 0;
+		goto cleanup;
-	readline = PyObject_GetAttrString(reader, "readline");
+	readline = PyObject_GetAttrString(stream, "readline");
-	Py_DECREF(reader);
 	if (readline == NULL)
-		return 0;
+		goto cleanup;
 	tok->decoding_readline = readline;
-	return 1;
+	ok = 1;
+  cleanup:
+	Py_XDECREF(stream);
+	Py_XDECREF(io);
+	return ok;
 }
 /* Fetch the next byte from TOK. */