Unverified Kaydet (Commit) 709d23de authored tarafından Victor Stinner's avatar Victor Stinner Kaydeden (comit) GitHub

bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)

_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.

Changes:

* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
  structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
  functions.
* Add error_handler parameter to unicode_encode_locale()
  and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
  to _PyCoreConfig_DecodeLocale().
üst 6ae2bbbd
......@@ -207,8 +207,8 @@ typedef struct {
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
*/
char *filesystem_encoding;
char *filesystem_errors;
wchar_t *filesystem_encoding;
wchar_t *filesystem_errors;
wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
......@@ -334,13 +334,13 @@ typedef struct {
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_errors' attribute. */
char *stdio_encoding;
wchar_t *stdio_encoding;
/* Error handler of sys.stdin and sys.stdout.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_encoding' attribute. */
char *stdio_errors;
wchar_t *stdio_errors;
#ifdef MS_WINDOWS
/* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys
......
......@@ -106,12 +106,9 @@ PyAPI_FUNC(_PyInitError) _PyCoreConfig_Copy(
_PyCoreConfig *config,
const _PyCoreConfig *config2);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetString(
char **config_str,
const char *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideString(
wchar_t **config_str,
const wchar_t *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideStringFromString(
PyAPI_FUNC(_PyInitError) _PyCoreConfig_DecodeLocale(
wchar_t **config_str,
const char *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_InitPathConfig(_PyCoreConfig *config);
......
......@@ -21,6 +21,9 @@ extern int _Py_SetFileSystemEncoding(
const char *errors);
extern void _Py_ClearFileSystemEncoding(void);
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
#ifdef MS_WINDOWS
extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void);
#endif
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
......
......@@ -56,7 +56,14 @@ struct _is {
PyObject *codec_search_cache;
PyObject *codec_error_registry;
int codecs_initialized;
int fscodec_initialized;
/* fs_codec.encoding is initialized to NULL.
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct {
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
char *errors; /* Filesystem errors (encoded to UTF-8) */
_Py_error_handler error_handler;
} fs_codec;
_PyCoreConfig core_config;
#ifdef HAVE_DLOPEN
......
......@@ -260,6 +260,7 @@ Py_LOCAL_INLINE(PyObject *)
STRINGLIB(utf8_encoder)(PyObject *unicode,
STRINGLIB_CHAR *data,
Py_ssize_t size,
_Py_error_handler error_handler,
const char *errors)
{
Py_ssize_t i; /* index into data of next input character */
......@@ -268,7 +269,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
const Py_ssize_t max_char_size = 2;
......
This diff is collapsed.
......@@ -488,8 +488,8 @@ static int test_init_from_config(void)
Force it to 0 through the config. */
config.legacy_windows_stdio = 0;
#endif
config.stdio_encoding = "iso8859-1";
config.stdio_errors = "replace";
config.stdio_encoding = L"iso8859-1";
config.stdio_errors = L"replace";
putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;
......
This diff is collapsed.
......@@ -14,7 +14,10 @@
/* --- File system encoding/errors -------------------------------- */
/* The filesystem encoding is chosen by config_init_fs_encoding(),
see also initfsencoding(). */
see also initfsencoding().
Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
are encoded to UTF-8. */
const char *Py_FileSystemDefaultEncoding = NULL;
int Py_HasFileSystemDefaultEncoding = 0;
const char *Py_FileSystemDefaultEncodeErrors = NULL;
......
......@@ -1668,7 +1668,7 @@ is_valid_fd(int fd)
static PyObject*
create_stdio(const _PyCoreConfig *config, PyObject* io,
int fd, int write_mode, const char* name,
const char* encoding, const char* errors)
const wchar_t* encoding, const wchar_t* errors)
{
PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
const char* mode;
......@@ -1718,7 +1718,7 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
#ifdef MS_WINDOWS
/* Windows console IO is always UTF-8 encoded */
if (PyWindowsConsoleIO_Check(raw))
encoding = "utf-8";
encoding = L"utf-8";
#endif
text = PyUnicode_FromString(name);
......@@ -1754,10 +1754,25 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
newline = "\n";
#endif
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
buf, encoding, errors,
PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
if (encoding_str == NULL) {
Py_CLEAR(buf);
goto error;
}
PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
if (errors_str == NULL) {
Py_CLEAR(buf);
Py_CLEAR(encoding_str);
goto error;
}
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
buf, encoding_str, errors_str,
newline, line_buffering, write_through);
Py_CLEAR(buf);
Py_CLEAR(encoding_str);
Py_CLEAR(errors_str);
if (stream == NULL)
goto error;
......@@ -1874,7 +1889,7 @@ init_sys_streams(PyInterpreterState *interp)
fd = fileno(stderr);
std = create_stdio(config, iomod, fd, 1, "<stderr>",
config->stdio_encoding,
"backslashreplace");
L"backslashreplace");
if (std == NULL)
goto error;
......
......@@ -424,7 +424,7 @@ sys_getfilesystemencoding_impl(PyObject *module)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
return PyUnicode_FromString(config->filesystem_encoding);
return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
}
/*[clinic input]
......@@ -439,7 +439,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
return PyUnicode_FromString(config->filesystem_errors);
return PyUnicode_FromWideChar(config->filesystem_errors, -1);
}
/*[clinic input]
......@@ -1211,30 +1211,9 @@ static PyObject *
sys__enablelegacywindowsfsencoding_impl(PyObject *module)
/*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
_PyCoreConfig *config = &interp->core_config;
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
char *encoding = _PyMem_RawStrdup("mbcs");
char *errors = _PyMem_RawStrdup("replace");
if (encoding == NULL || errors == NULL) {
PyMem_Free(encoding);
PyMem_Free(errors);
PyErr_NoMemory();
return NULL;
}
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
PyMem_RawFree(config->filesystem_errors);
config->filesystem_errors = errors;
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
PyErr_NoMemory();
if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
return NULL;
}
Py_RETURN_NONE;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment