Kaydet (Commit) 84ae1180 authored tarafından Victor Stinner's avatar Victor Stinner

Issue #8603: Create a bytes version of os.environ for Unix

Create os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
argument to the file system encoding with the surrogateescape error handler
(instead of utf8/strict) and accepts bytes, and posix.environ keys and values
are bytes.
üst d930b635
...@@ -107,6 +107,10 @@ process and user. ...@@ -107,6 +107,10 @@ process and user.
to modify the environment as well as query the environment. :func:`putenv` will to modify the environment as well as query the environment. :func:`putenv` will
be called automatically when the mapping is modified. be called automatically when the mapping is modified.
On Unix, keys and values use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler. Use :data:`environb` if you would like
to use a different encoding.
.. note:: .. note::
Calling :func:`putenv` directly does not change ``os.environ``, so it's better Calling :func:`putenv` directly does not change ``os.environ``, so it's better
...@@ -128,6 +132,16 @@ process and user. ...@@ -128,6 +132,16 @@ process and user.
one of the :meth:`pop` or :meth:`clear` methods is called. one of the :meth:`pop` or :meth:`clear` methods is called.
.. data:: environb
Bytes version of :data:`environ`: a mapping object representing the
environment as byte strings. :data:`environ` and :data:`environb` are
synchronized (modify :data:`environb` updates :data:`environ`, and vice
versa).
Availability: Unix.
.. function:: chdir(path) .. function:: chdir(path)
fchdir(fd) fchdir(fd)
getcwd() getcwd()
...@@ -251,7 +265,19 @@ process and user. ...@@ -251,7 +265,19 @@ process and user.
.. function:: getenv(key, default=None) .. function:: getenv(key, default=None)
Return the value of the environment variable *key* if it exists, or Return the value of the environment variable *key* if it exists, or
*default* if it doesn't. Availability: most flavors of Unix, Windows. *default* if it doesn't. *key*, *default* and the result are str.
Availability: most flavors of Unix, Windows.
On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding`
and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you
would like to use a different encoding.
.. function:: getenvb(key, default=None)
Return the value of the environment variable *key* if it exists, or
*default* if it doesn't. *key*, *default* and the result are bytes.
Availability: most flavors of Unix.
.. function:: putenv(key, value) .. function:: putenv(key, value)
......
...@@ -69,17 +69,22 @@ In addition to many functions described in the :mod:`os` module documentation, ...@@ -69,17 +69,22 @@ In addition to many functions described in the :mod:`os` module documentation,
.. data:: environ .. data:: environ
A dictionary representing the string environment at the time the interpreter A dictionary representing the string environment at the time the interpreter
was started. For example, ``environ['HOME']`` is the pathname of your home was started. Keys and values are bytes on Unix and str on Windows. For
directory, equivalent to ``getenv("HOME")`` in C. example, ``environ[b'HOME']`` (``environ['HOME']`` on Windows) is the
pathname of your home directory, equivalent to ``getenv("HOME")`` in C.
Modifying this dictionary does not affect the string environment passed on by Modifying this dictionary does not affect the string environment passed on by
:func:`execv`, :func:`popen` or :func:`system`; if you need to change the :func:`execv`, :func:`popen` or :func:`system`; if you need to change the
environment, pass ``environ`` to :func:`execve` or add variable assignments and environment, pass ``environ`` to :func:`execve` or add variable assignments and
export statements to the command string for :func:`system` or :func:`popen`. export statements to the command string for :func:`system` or :func:`popen`.
.. versionchanged:: 3.2
On Unix, keys and values are bytes.
.. note:: .. note::
The :mod:`os` module provides an alternate implementation of ``environ`` which The :mod:`os` module provides an alternate implementation of ``environ``
updates the environment on modification. Note also that updating ``os.environ`` which updates the environment on modification. Note also that updating
will render this dictionary obsolete. Use of the :mod:`os` module version of :data:`os.environ` will render this dictionary obsolete. Use of the
this is recommended over direct access to the :mod:`posix` module. :mod:`os` module version of this is recommended over direct access to the
:mod:`posix` module.
...@@ -387,29 +387,33 @@ def get_exec_path(env=None): ...@@ -387,29 +387,33 @@ def get_exec_path(env=None):
from _abcoll import MutableMapping # Can't use collections (bootstrap) from _abcoll import MutableMapping # Can't use collections (bootstrap)
class _Environ(MutableMapping): class _Environ(MutableMapping):
def __init__(self, environ, keymap, putenv, unsetenv): def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv):
self.keymap = keymap self.encodekey = encodekey
self.decodekey = decodekey
self.encodevalue = encodevalue
self.decodevalue = decodevalue
self.putenv = putenv self.putenv = putenv
self.unsetenv = unsetenv self.unsetenv = unsetenv
self.data = data = {} self.data = data
for key, value in environ.items():
data[keymap(key)] = str(value)
def __getitem__(self, key): def __getitem__(self, key):
return self.data[self.keymap(key)] value = self.data[self.encodekey(key)]
return self.decodevalue(value)
def __setitem__(self, key, value): def __setitem__(self, key, value):
value = str(value) key = self.encodekey(key)
value = self.encodevalue(value)
self.putenv(key, value) self.putenv(key, value)
self.data[self.keymap(key)] = value self.data[key] = value
def __delitem__(self, key): def __delitem__(self, key):
key = self.encodekey(key)
self.unsetenv(key) self.unsetenv(key)
del self.data[self.keymap(key)] del self.data[key]
def __iter__(self): def __iter__(self):
for key in self.data: for key in self.data:
yield key yield self.decodekey(key)
def __len__(self): def __len__(self):
return len(self.data) return len(self.data)
...@@ -439,22 +443,67 @@ except NameError: ...@@ -439,22 +443,67 @@ except NameError:
else: else:
__all__.append("unsetenv") __all__.append("unsetenv")
if name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE def _createenviron():
_keymap = lambda key: str(key.upper()) if name in ('os2', 'nt'):
else: # Where Env Var Names Can Be Mixed Case # Where Env Var Names Must Be UPPERCASE
_keymap = lambda key: str(key) def check_str(value):
if not isinstance(value, str):
environ = _Environ(environ, _keymap, _putenv, _unsetenv) raise TypeError("str expected, not %s" % type(value).__name__)
return value
encode = check_str
decode = str
def encodekey(key):
return encode(key).upper()
data = {}
for key, value in environ.items():
data[encodekey(key)] = value
else:
# Where Env Var Names Can Be Mixed Case
def encode(value):
if not isinstance(value, str):
raise TypeError("str expected, not %s" % type(value).__name__)
return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
def decode(value):
return value.decode(sys.getfilesystemencoding(), 'surrogateescape')
encodekey = encode
data = environ
return _Environ(data,
encodekey, decode,
encode, decode,
_putenv, _unsetenv)
# unicode environ
environ = _createenviron()
del _createenviron
def getenv(key, default=None): def getenv(key, default=None):
"""Get an environment variable, return None if it doesn't exist. """Get an environment variable, return None if it doesn't exist.
The optional second argument can specify an alternate default.""" The optional second argument can specify an alternate default.
if isinstance(key, bytes): key, default and the result are str."""
key = key.decode(sys.getfilesystemencoding(), "surrogateescape")
return environ.get(key, default) return environ.get(key, default)
__all__.append("getenv") __all__.append("getenv")
if name not in ('os2', 'nt'):
def _check_bytes(value):
if not isinstance(value, bytes):
raise TypeError("bytes expected, not %s" % type(value).__name__)
return value
# bytes environ
environb = _Environ(environ.data,
_check_bytes, bytes,
_check_bytes, bytes,
_putenv, _unsetenv)
del _check_bytes
def getenvb(key, default=None):
"""Get an environment variable, return None if it doesn't exist.
The optional second argument can specify an alternate default.
key, default and the result are bytes."""
return environb.get(key, default)
__all__.append("getenvb")
def _exists(name): def _exists(name):
return name in globals() return name in globals()
......
...@@ -369,12 +369,15 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol): ...@@ -369,12 +369,15 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol):
def setUp(self): def setUp(self):
self.__save = dict(os.environ) self.__save = dict(os.environ)
self.__saveb = dict(os.environb)
for key, value in self._reference().items(): for key, value in self._reference().items():
os.environ[key] = value os.environ[key] = value
def tearDown(self): def tearDown(self):
os.environ.clear() os.environ.clear()
os.environ.update(self.__save) os.environ.update(self.__save)
os.environb.clear()
os.environb.update(self.__saveb)
def _reference(self): def _reference(self):
return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"} return {"KEY1":"VALUE1", "KEY2":"VALUE2", "KEY3":"VALUE3"}
...@@ -439,6 +442,24 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol): ...@@ -439,6 +442,24 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol):
# Supplied PATH environment variable # Supplied PATH environment variable
self.assertSequenceEqual(test_path, os.get_exec_path(test_env)) self.assertSequenceEqual(test_path, os.get_exec_path(test_env))
@unittest.skipIf(sys.platform == "win32", "POSIX specific test")
def test_environb(self):
# os.environ -> os.environb
value = 'euro\u20ac'
try:
value_bytes = value.encode(sys.getfilesystemencoding(), 'surrogateescape')
except UnicodeEncodeError:
raise unittest.SkipTest("U+20AC character is not encodable to %s" % sys.getfilesystemencoding())
os.environ['unicode'] = value
self.assertEquals(os.environ['unicode'], value)
self.assertEquals(os.environb[b'unicode'], value_bytes)
# os.environb -> os.environ
value = b'\xff'
os.environb[b'bytes'] = value
self.assertEquals(os.environb[b'bytes'], value)
value_str = value.decode(sys.getfilesystemencoding(), 'surrogateescape')
self.assertEquals(os.environ['bytes'], value_str)
class WalkTests(unittest.TestCase): class WalkTests(unittest.TestCase):
"""Tests for os.walk().""" """Tests for os.walk()."""
......
...@@ -803,8 +803,6 @@ class POSIXProcessTestCase(BaseTestCase): ...@@ -803,8 +803,6 @@ class POSIXProcessTestCase(BaseTestCase):
def test_undecodable_env(self): def test_undecodable_env(self):
for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')): for key, value in (('test', 'abc\uDCFF'), ('test\uDCFF', '42')):
value_repr = repr(value).encode("ascii")
# test str with surrogates # test str with surrogates
script = "import os; print(repr(os.getenv(%s)))" % repr(key) script = "import os; print(repr(os.getenv(%s)))" % repr(key)
env = os.environ.copy() env = os.environ.copy()
...@@ -813,19 +811,19 @@ class POSIXProcessTestCase(BaseTestCase): ...@@ -813,19 +811,19 @@ class POSIXProcessTestCase(BaseTestCase):
[sys.executable, "-c", script], [sys.executable, "-c", script],
env=env) env=env)
stdout = stdout.rstrip(b'\n\r') stdout = stdout.rstrip(b'\n\r')
self.assertEquals(stdout, value_repr) self.assertEquals(stdout.decode('ascii'), repr(value))
# test bytes # test bytes
key = key.encode("ascii", "surrogateescape") key = key.encode("ascii", "surrogateescape")
value = value.encode("ascii", "surrogateescape") value = value.encode("ascii", "surrogateescape")
script = "import os; print(repr(os.getenv(%s)))" % repr(key) script = "import os; print(repr(os.getenvb(%s)))" % repr(key)
env = os.environ.copy() env = os.environ.copy()
env[key] = value env[key] = value
stdout = subprocess.check_output( stdout = subprocess.check_output(
[sys.executable, "-c", script], [sys.executable, "-c", script],
env=env) env=env)
stdout = stdout.rstrip(b'\n\r') stdout = stdout.rstrip(b'\n\r')
self.assertEquals(stdout, value_repr) self.assertEquals(stdout.decode('ascii'), repr(value))
@unittest.skipUnless(mswindows, "Windows specific tests") @unittest.skipUnless(mswindows, "Windows specific tests")
......
...@@ -348,6 +348,12 @@ C-API ...@@ -348,6 +348,12 @@ C-API
Library Library
------- -------
- Issue #8603: Create a bytes version of os.environ for Unix: create
os.environb mapping and os.getenvb() function, os.unsetenv() encodes str
argument to the file system encoding with the surrogateescape error handler
(instead of utf8/strict) and accepts bytes, and posix.environ keys and values
are bytes.
- Issue #8573: asyncore _strerror() function might throw ValueError. - Issue #8573: asyncore _strerror() function might throw ValueError.
- Issue #8483: asyncore.dispatcher's __getattr__ method produced confusing - Issue #8483: asyncore.dispatcher's __getattr__ method produced confusing
......
...@@ -498,14 +498,12 @@ convertenviron(void) ...@@ -498,14 +498,12 @@ convertenviron(void)
char *p = strchr(*e, '='); char *p = strchr(*e, '=');
if (p == NULL) if (p == NULL)
continue; continue;
k = PyUnicode_Decode(*e, (int)(p-*e), k = PyBytes_FromStringAndSize(*e, (int)(p-*e));
Py_FileSystemDefaultEncoding, "surrogateescape");
if (k == NULL) { if (k == NULL) {
PyErr_Clear(); PyErr_Clear();
continue; continue;
} }
v = PyUnicode_Decode(p+1, strlen(p+1), v = PyBytes_FromStringAndSize(p+1, strlen(p+1));
Py_FileSystemDefaultEncoding, "surrogateescape");
if (v == NULL) { if (v == NULL) {
PyErr_Clear(); PyErr_Clear();
Py_DECREF(k); Py_DECREF(k);
...@@ -5301,7 +5299,7 @@ posix_putenv(PyObject *self, PyObject *args) ...@@ -5301,7 +5299,7 @@ posix_putenv(PyObject *self, PyObject *args)
char *s1, *s2; char *s1, *s2;
char *newenv; char *newenv;
#endif #endif
PyObject *newstr; PyObject *newstr = NULL;
size_t len; size_t len;
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
...@@ -5324,15 +5322,19 @@ posix_putenv(PyObject *self, PyObject *args) ...@@ -5324,15 +5322,19 @@ posix_putenv(PyObject *self, PyObject *args)
APIRET rc; APIRET rc;
rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH); rc = DosSetExtLIBPATH(s2, BEGIN_LIBPATH);
if (rc != NO_ERROR) if (rc != NO_ERROR) {
return os2_error(rc); os2_error(rc);
goto error;
}
} else if (stricmp(s1, "ENDLIBPATH") == 0) { } else if (stricmp(s1, "ENDLIBPATH") == 0) {
APIRET rc; APIRET rc;
rc = DosSetExtLIBPATH(s2, END_LIBPATH); rc = DosSetExtLIBPATH(s2, END_LIBPATH);
if (rc != NO_ERROR) if (rc != NO_ERROR) {
return os2_error(rc); os2_error(rc);
goto error;
}
} else { } else {
#endif #endif
/* XXX This can leak memory -- not easy to fix :-( */ /* XXX This can leak memory -- not easy to fix :-( */
...@@ -5342,36 +5344,40 @@ posix_putenv(PyObject *self, PyObject *args) ...@@ -5342,36 +5344,40 @@ posix_putenv(PyObject *self, PyObject *args)
len = wcslen(s1) + wcslen(s2) + 2; len = wcslen(s1) + wcslen(s2) + 2;
newstr = PyUnicode_FromUnicode(NULL, (int)len - 1); newstr = PyUnicode_FromUnicode(NULL, (int)len - 1);
#else #else
len = strlen(s1) + strlen(s2) + 2; len = PyBytes_GET_SIZE(os1) + PyBytes_GET_SIZE(os2) + 2;
newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1); newstr = PyBytes_FromStringAndSize(NULL, (int)len - 1);
#endif #endif
if (newstr == NULL) if (newstr == NULL) {
return PyErr_NoMemory(); PyErr_NoMemory();
goto error;
}
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
newenv = PyUnicode_AsUnicode(newstr); newenv = PyUnicode_AsUnicode(newstr);
_snwprintf(newenv, len, L"%s=%s", s1, s2); _snwprintf(newenv, len, L"%s=%s", s1, s2);
if (_wputenv(newenv)) { if (_wputenv(newenv)) {
Py_DECREF(newstr);
posix_error(); posix_error();
return NULL; goto error;
} }
#else #else
newenv = PyBytes_AS_STRING(newstr); newenv = PyBytes_AS_STRING(newstr);
PyOS_snprintf(newenv, len, "%s=%s", s1, s2); PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
if (putenv(newenv)) { if (putenv(newenv)) {
Py_DECREF(newstr);
Py_DECREF(os1);
Py_DECREF(os2);
posix_error(); posix_error();
return NULL; goto error;
} }
#endif #endif
/* Install the first arg and newstr in posix_putenv_garbage; /* Install the first arg and newstr in posix_putenv_garbage;
* this will cause previous value to be collected. This has to * this will cause previous value to be collected. This has to
* happen after the real putenv() call because the old value * happen after the real putenv() call because the old value
* was still accessible until then. */ * was still accessible until then. */
if (PyDict_SetItem(posix_putenv_garbage, if (PyDict_SetItem(posix_putenv_garbage,
PyTuple_GET_ITEM(args, 0), newstr)) { #ifdef MS_WINDOWS
PyTuple_GET_ITEM(args, 0),
#else
os1,
#endif
newstr)) {
/* really not much we can do; just leak */ /* really not much we can do; just leak */
PyErr_Clear(); PyErr_Clear();
} }
...@@ -5382,12 +5388,20 @@ posix_putenv(PyObject *self, PyObject *args) ...@@ -5382,12 +5388,20 @@ posix_putenv(PyObject *self, PyObject *args)
#if defined(PYOS_OS2) #if defined(PYOS_OS2)
} }
#endif #endif
#ifndef MS_WINDOWS #ifndef MS_WINDOWS
Py_DECREF(os1); Py_DECREF(os1);
Py_DECREF(os2); Py_DECREF(os2);
#endif #endif
Py_INCREF(Py_None); Py_RETURN_NONE;
return Py_None;
error:
#ifndef MS_WINDOWS
Py_DECREF(os1);
Py_DECREF(os2);
#endif
Py_XDECREF(newstr);
return NULL;
} }
#endif /* putenv */ #endif /* putenv */
...@@ -5399,10 +5413,20 @@ Delete an environment variable."); ...@@ -5399,10 +5413,20 @@ Delete an environment variable.");
static PyObject * static PyObject *
posix_unsetenv(PyObject *self, PyObject *args) posix_unsetenv(PyObject *self, PyObject *args)
{ {
#ifdef MS_WINDOWS
char *s1; char *s1;
if (!PyArg_ParseTuple(args, "s:unsetenv", &s1)) if (!PyArg_ParseTuple(args, "s:unsetenv", &s1))
return NULL; return NULL;
#else
PyObject *os1;
char *s1;
if (!PyArg_ParseTuple(args, "O&:unsetenv",
PyUnicode_FSConverter, &os1))
return NULL;
s1 = PyBytes_AsString(os1);
#endif
unsetenv(s1); unsetenv(s1);
...@@ -5412,13 +5436,20 @@ posix_unsetenv(PyObject *self, PyObject *args) ...@@ -5412,13 +5436,20 @@ posix_unsetenv(PyObject *self, PyObject *args)
* old value was still accessible until then. * old value was still accessible until then.
*/ */
if (PyDict_DelItem(posix_putenv_garbage, if (PyDict_DelItem(posix_putenv_garbage,
PyTuple_GET_ITEM(args, 0))) { #ifdef MS_WINDOWS
PyTuple_GET_ITEM(args, 0)
#else
os1
#endif
)) {
/* really not much we can do; just leak */ /* really not much we can do; just leak */
PyErr_Clear(); PyErr_Clear();
} }
Py_INCREF(Py_None); #ifndef MS_WINDOWS
return Py_None; Py_DECREF(os1);
#endif
Py_RETURN_NONE;
} }
#endif /* unsetenv */ #endif /* unsetenv */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment