Kaydet (Commit) 99815892 authored tarafından Martin v. Löwis's avatar Martin v. Löwis

New environment variable PYTHONIOENCODING.

üst 7f7ca35f
...@@ -130,6 +130,14 @@ change in future releases of Python. ...@@ -130,6 +130,14 @@ change in future releases of Python.
.. versionadded:: 2.3 .. versionadded:: 2.3
.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
Set the file's encoding for Unicode output to *enc*, and its error
mode to *err*. Return 1 on success and 0 on failure.
.. versionadded:: 2.6
.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag) .. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
.. index:: single: softspace (file attribute) .. index:: single: softspace (file attribute)
......
...@@ -2165,6 +2165,13 @@ the particular object. ...@@ -2165,6 +2165,13 @@ the particular object.
.. versionadded:: 2.3 .. versionadded:: 2.3
.. attribute:: file.errors
The Unicode error handler used to along with the encoding.
.. versionadded:: 2.6
.. attribute:: file.mode .. attribute:: file.mode
The I/O mode for the file. If the file was created using the :func:`open` The I/O mode for the file. If the file was created using the :func:`open`
......
...@@ -481,6 +481,13 @@ These environment variables influence Python's behavior. ...@@ -481,6 +481,13 @@ These environment variables influence Python's behavior.
.. versionadded:: 2.6 .. versionadded:: 2.6
.. envvar:: PYTHONIOENCODING
Overrides the encoding used for stdin/stdout/stderr, in the syntax
encodingname:errorhandler, with the :errors part being optional.
.. versionadded:: 2.6
.. envvar:: PYTHONNOUSERSITE .. envvar:: PYTHONNOUSERSITE
......
...@@ -24,6 +24,7 @@ typedef struct { ...@@ -24,6 +24,7 @@ typedef struct {
int f_newlinetypes; /* Types of newlines seen */ int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */ int f_skipnextlf; /* Skip next \n */
PyObject *f_encoding; PyObject *f_encoding;
PyObject *f_errors;
PyObject *weakreflist; /* List of weak references */ PyObject *weakreflist; /* List of weak references */
int unlocked_count; /* Num. currently running sections of code int unlocked_count; /* Num. currently running sections of code
using f_fp with the GIL released. */ using f_fp with the GIL released. */
...@@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type; ...@@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *)); int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);
......
...@@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase): ...@@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase):
## self.assert_(r[0][2] > 100, r[0][2]) ## self.assert_(r[0][2] > 100, r[0][2])
## self.assert_(r[1][2] > 100, r[1][2]) ## self.assert_(r[1][2] > 100, r[1][2])
def test_ioencoding(self):
import subprocess,os
env = dict(os.environ)
# Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
# not representable in ASCII.
env["PYTHONIOENCODING"] = "cp424"
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
stdout = subprocess.PIPE, env=env)
out = p.stdout.read().strip()
self.assertEqual(out, unichr(0xa2).encode("cp424"))
env["PYTHONIOENCODING"] = "ascii:replace"
p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
stdout = subprocess.PIPE, env=env)
out = p.stdout.read().strip()
self.assertEqual(out, '?')
def test_main(): def test_main():
test.test_support.run_unittest(SysModuleTest) test.test_support.run_unittest(SysModuleTest)
......
...@@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1? ...@@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1?
Core and Builtins Core and Builtins
----------------- -----------------
- New environment variable PYTHONIOENCODING.
- Patch #2488: Add sys.maxsize. - Patch #2488: Add sys.maxsize.
- Issue #2353: file.xreadlines() now emits a Py3k warning. - Issue #2353: file.xreadlines() now emits a Py3k warning.
......
...@@ -99,6 +99,7 @@ static char *usage_5 = "\ ...@@ -99,6 +99,7 @@ static char *usage_5 = "\
PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\ The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
"; ";
......
...@@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, ...@@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
Py_DECREF(f->f_name); Py_DECREF(f->f_name);
Py_DECREF(f->f_mode); Py_DECREF(f->f_mode);
Py_DECREF(f->f_encoding); Py_DECREF(f->f_encoding);
Py_DECREF(f->f_errors);
Py_INCREF(name); Py_INCREF(name);
f->f_name = name; f->f_name = name;
...@@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, ...@@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
f->f_skipnextlf = 0; f->f_skipnextlf = 0;
Py_INCREF(Py_None); Py_INCREF(Py_None);
f->f_encoding = Py_None; f->f_encoding = Py_None;
Py_INCREF(Py_None);
f->f_errors = Py_None;
if (f->f_mode == NULL) if (f->f_mode == NULL)
return NULL; return NULL;
...@@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize) ...@@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
} }
/* Set the encoding used to output Unicode strings. /* Set the encoding used to output Unicode strings.
Returh 1 on success, 0 on failure. */ Return 1 on success, 0 on failure. */
int int
PyFile_SetEncoding(PyObject *f, const char *enc) PyFile_SetEncoding(PyObject *f, const char *enc)
{
return PyFile_SetEncodingAndErrors(f, enc, NULL);
}
int
PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
{ {
PyFileObject *file = (PyFileObject*)f; PyFileObject *file = (PyFileObject*)f;
PyObject *str = PyBytes_FromString(enc); PyObject *str, *oerrors;
assert(PyFile_Check(f)); assert(PyFile_Check(f));
str = PyBytes_FromString(enc);
if (!str) if (!str)
return 0; return 0;
if (errors) {
oerrors = PyString_FromString(errors);
if (!oerrors) {
Py_DECREF(str);
return 0;
}
} else {
oerrors = Py_None;
Py_INCREF(Py_None);
}
Py_DECREF(file->f_encoding); Py_DECREF(file->f_encoding);
file->f_encoding = str; file->f_encoding = str;
Py_DECREF(file->f_errors);
file->f_errors = oerrors;
return 1; return 1;
} }
...@@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f) ...@@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f)
Py_XDECREF(f->f_name); Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode); Py_XDECREF(f->f_mode);
Py_XDECREF(f->f_encoding); Py_XDECREF(f->f_encoding);
Py_XDECREF(f->f_errors);
drop_readahead(f); drop_readahead(f);
Py_TYPE(f)->tp_free((PyObject *)f); Py_TYPE(f)->tp_free((PyObject *)f);
} }
...@@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = { ...@@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = {
"file name"}, "file name"},
{"encoding", T_OBJECT, OFF(f_encoding), RO, {"encoding", T_OBJECT, OFF(f_encoding), RO,
"file encoding"}, "file encoding"},
{"errors", T_OBJECT, OFF(f_errors), RO,
"Unicode error handler"},
/* getattr(f, "closed") is implemented without this table */ /* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
...@@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ...@@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyFileObject *)self)->f_mode = not_yet_string; ((PyFileObject *)self)->f_mode = not_yet_string;
Py_INCREF(Py_None); Py_INCREF(Py_None);
((PyFileObject *)self)->f_encoding = Py_None; ((PyFileObject *)self)->f_encoding = Py_None;
Py_INCREF(Py_None);
((PyFileObject *)self)->f_errors = Py_None;
((PyFileObject *)self)->weakreflist = NULL; ((PyFileObject *)self)->weakreflist = NULL;
((PyFileObject *)self)->unlocked_count = 0; ((PyFileObject *)self)->unlocked_count = 0;
} }
...@@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags) ...@@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
if ((flags & Py_PRINT_RAW) && if ((flags & Py_PRINT_RAW) &&
PyUnicode_Check(v) && enc != Py_None) { PyUnicode_Check(v) && enc != Py_None) {
char *cenc = PyBytes_AS_STRING(enc); char *cenc = PyBytes_AS_STRING(enc);
value = PyUnicode_AsEncodedString(v, cenc, "strict"); char *errors = fobj->f_errors == Py_None ?
"strict" : PyBytes_AS_STRING(fobj->f_errors);
value = PyUnicode_AsEncodedString(v, cenc, errors);
if (value == NULL) if (value == NULL)
return -1; return -1;
} else { } else {
......
...@@ -132,10 +132,19 @@ Py_InitializeEx(int install_sigs) ...@@ -132,10 +132,19 @@ Py_InitializeEx(int install_sigs)
PyThreadState *tstate; PyThreadState *tstate;
PyObject *bimod, *sysmod; PyObject *bimod, *sysmod;
char *p; char *p;
char *icodeset; /* On Windows, input codeset may theoretically
differ from output codeset. */
char *codeset = NULL;
char *errors = NULL;
int free_codeset = 0;
int overridden = 0;
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET) #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
char *codeset; char *saved_locale, *loc_codeset;
char *saved_locale;
PyObject *sys_stream, *sys_isatty; PyObject *sys_stream, *sys_isatty;
#endif
#ifdef MS_WINDOWS
char ibuf[128];
char buf[128];
#endif #endif
extern void _Py_ReadyTypes(void); extern void _Py_ReadyTypes(void);
...@@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs) ...@@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs)
_PyGILState_Init(interp, tstate); _PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */ #endif /* WITH_THREAD */
if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
p = icodeset = codeset = strdup(p);
free_codeset = 1;
errors = strchr(p, ':');
if (errors) {
*errors = '\0';
errors++;
}
overridden = 1;
}
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET) #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
/* On Unix, set the file system encoding according to the /* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */ stdin and stdout if these are terminals, unless overridden. */
saved_locale = strdup(setlocale(LC_CTYPE, NULL)); if (!overridden || !Py_FileSystemDefaultEncoding) {
setlocale(LC_CTYPE, ""); saved_locale = strdup(setlocale(LC_CTYPE, NULL));
codeset = nl_langinfo(CODESET); setlocale(LC_CTYPE, "");
if (codeset && *codeset) { loc_codeset = nl_langinfo(CODESET);
PyObject *enc = PyCodec_Encoder(codeset); if (loc_codeset && *loc_codeset) {
if (enc) { PyObject *enc = PyCodec_Encoder(loc_codeset);
codeset = strdup(codeset); if (enc) {
Py_DECREF(enc); loc_codeset = strdup(loc_codeset);
} else { Py_DECREF(enc);
codeset = NULL; } else {
PyErr_Clear(); loc_codeset = NULL;
PyErr_Clear();
}
} else
loc_codeset = NULL;
setlocale(LC_CTYPE, saved_locale);
free(saved_locale);
if (!overridden) {
codeset = icodeset = loc_codeset;
free_codeset = 1;
}
/* Initialize Py_FileSystemDefaultEncoding from
locale even if PYTHONIOENCODING is set. */
if (!Py_FileSystemDefaultEncoding) {
Py_FileSystemDefaultEncoding = loc_codeset;
if (!overridden)
free_codeset = 0;
} }
} else }
codeset = NULL; #endif
setlocale(LC_CTYPE, saved_locale);
free(saved_locale); #ifdef MS_WINDOWS
if (!overridden) {
icodeset = ibuf;
encoding = buf;
sprintf(ibuf, "cp%d", GetConsoleCP());
sprintf(buf, "cp%d", GetConsoleOutputCP());
}
#endif
if (codeset) { if (codeset) {
sys_stream = PySys_GetObject("stdin"); sys_stream = PySys_GetObject("stdin");
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty) if (!sys_isatty)
PyErr_Clear(); PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) && if ((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) { PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset)) if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
Py_FatalError("Cannot set codeset of stdin"); Py_FatalError("Cannot set codeset of stdin");
} }
Py_XDECREF(sys_isatty); Py_XDECREF(sys_isatty);
...@@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs) ...@@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty) if (!sys_isatty)
PyErr_Clear(); PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) && if ((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) { PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset)) if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stdout"); Py_FatalError("Cannot set codeset of stdout");
} }
Py_XDECREF(sys_isatty); Py_XDECREF(sys_isatty);
...@@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs) ...@@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs)
sys_isatty = PyObject_CallMethod(sys_stream, "isatty", ""); sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
if (!sys_isatty) if (!sys_isatty)
PyErr_Clear(); PyErr_Clear();
if(sys_isatty && PyObject_IsTrue(sys_isatty) && if((overridden ||
(sys_isatty && PyObject_IsTrue(sys_isatty))) &&
PyFile_Check(sys_stream)) { PyFile_Check(sys_stream)) {
if (!PyFile_SetEncoding(sys_stream, codeset)) if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
Py_FatalError("Cannot set codeset of stderr"); Py_FatalError("Cannot set codeset of stderr");
} }
Py_XDECREF(sys_isatty); Py_XDECREF(sys_isatty);
if (!Py_FileSystemDefaultEncoding) if (free_codeset)
Py_FileSystemDefaultEncoding = codeset;
else
free(codeset); free(codeset);
} }
#endif
} }
void void
......
...@@ -1232,9 +1232,6 @@ _PySys_Init(void) ...@@ -1232,9 +1232,6 @@ _PySys_Init(void)
PyObject *m, *v, *sysdict; PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr; PyObject *sysin, *sysout, *syserr;
char *s; char *s;
#ifdef MS_WINDOWS
char buf[128];
#endif
m = Py_InitModule3("sys", sys_methods, sys_doc); m = Py_InitModule3("sys", sys_methods, sys_doc);
if (m == NULL) if (m == NULL)
...@@ -1272,23 +1269,6 @@ _PySys_Init(void) ...@@ -1272,23 +1269,6 @@ _PySys_Init(void)
syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush); syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
if (PyErr_Occurred()) if (PyErr_Occurred())
return NULL; return NULL;
#ifdef MS_WINDOWS
if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
sprintf(buf, "cp%d", GetConsoleCP());
if (!PyFile_SetEncoding(sysin, buf))
return NULL;
}
if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
sprintf(buf, "cp%d", GetConsoleOutputCP());
if (!PyFile_SetEncoding(sysout, buf))
return NULL;
}
if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
sprintf(buf, "cp%d", GetConsoleOutputCP());
if (!PyFile_SetEncoding(syserr, buf))
return NULL;
}
#endif
PyDict_SetItemString(sysdict, "stdin", sysin); PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout); PyDict_SetItemString(sysdict, "stdout", sysout);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment