Kaydet (Commit) e8d5145e authored tarafından Victor Stinner's avatar Victor Stinner

Create os.fsdecode(): decode from the filesystem encoding with surrogateescape

error handler, or strict error handler on Windows.

 * Rewrite os.fsencode() documentation
 * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING
   environment variable
üst dbe6042f
...@@ -155,13 +155,26 @@ process and user. ...@@ -155,13 +155,26 @@ process and user.
These functions are described in :ref:`os-file-dir`. These functions are described in :ref:`os-file-dir`.
.. function:: fsencode(value) .. function:: fsencode(filename)
Encode *value* to bytes for use in the file system, environment variables or Encode *filename* to the filesystem encoding with ``'surrogateescape'``
the command line. Use :func:`sys.getfilesystemencoding` and error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
``'surrogateescape'`` error handler for strings and return bytes unchanged. error handler if the filesystem encoding is ``'mbcs'`` (which is the default
On Windows, use ``'strict'`` error handler for strings if the file system encoding).
encoding is ``'mbcs'`` (which is the default encoding).
:func:`fsdencode` is the reverse function.
.. versionadded:: 3.2
.. function:: fsdecode(filename)
Decode *filename* from the filesystem encoding with ``'surrogateescape'``
error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
encoding).
:func:`fsencode` is the reverse function.
.. versionadded:: 3.2 .. versionadded:: 3.2
......
...@@ -237,13 +237,16 @@ Major performance enhancements have been added: ...@@ -237,13 +237,16 @@ Major performance enhancements have been added:
* Stub * Stub
Unicode Filenames and unicode
======= =====================
The filesystem encoding can be specified by setting the The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter. :envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``. The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
The :mod:`os` module has two new functions: :func:`os.fsencode` and
:func:`os.fsdecode`.
IDLE IDLE
==== ====
......
...@@ -402,8 +402,7 @@ def get_exec_path(env=None): ...@@ -402,8 +402,7 @@ def get_exec_path(env=None):
path_list = path_listb path_list = path_listb
if path_list is not None and isinstance(path_list, bytes): if path_list is not None and isinstance(path_list, bytes):
path_list = path_list.decode(sys.getfilesystemencoding(), path_list = fsdecode(path_list)
'surrogateescape')
if path_list is None: if path_list is None:
path_list = defpath path_list = defpath
...@@ -536,19 +535,39 @@ if supports_bytes_environ: ...@@ -536,19 +535,39 @@ if supports_bytes_environ:
__all__.extend(("environb", "getenvb")) __all__.extend(("environb", "getenvb"))
def fsencode(value): def fsencode(filename):
"""Encode value for use in the file system, environment variables """
or the command line.""" Encode filename to the filesystem encoding with 'surrogateescape' error
if isinstance(value, bytes): handler, return bytes unchanged. On Windows, use 'strict' error handler if
return value the file system encoding is 'mbcs' (which is the default encoding).
elif isinstance(value, str): """
if isinstance(filename, bytes):
return filename
elif isinstance(filename, str):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return filename.encode(encoding)
else:
return filename.encode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def fsdecode(filename):
"""
Decode filename from the filesystem encoding with 'surrogateescape' error
handler, return str unchanged. On Windows, use 'strict' error handler if
the file system encoding is 'mbcs' (which is the default encoding).
"""
if isinstance(filename, str):
return filename
elif isinstance(filename, bytes):
encoding = sys.getfilesystemencoding() encoding = sys.getfilesystemencoding()
if encoding == 'mbcs': if encoding == 'mbcs':
return value.encode(encoding) return filename.decode(encoding)
else: else:
return value.encode(encoding, 'surrogateescape') return filename.decode(encoding, 'surrogateescape')
else: else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__) raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def _exists(name): def _exists(name):
return name in globals() return name in globals()
......
...@@ -897,14 +897,6 @@ if sys.platform != 'win32': ...@@ -897,14 +897,6 @@ if sys.platform != 'win32':
class Pep383Tests(unittest.TestCase): class Pep383Tests(unittest.TestCase):
def setUp(self): def setUp(self):
def fsdecode(filename):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
errors = 'strict'
else:
errors = 'surrogateescape'
return filename.decode(encoding, errors)
if support.TESTFN_UNENCODABLE: if support.TESTFN_UNENCODABLE:
self.dir = support.TESTFN_UNENCODABLE self.dir = support.TESTFN_UNENCODABLE
else: else:
...@@ -930,7 +922,7 @@ if sys.platform != 'win32': ...@@ -930,7 +922,7 @@ if sys.platform != 'win32':
for fn in bytesfn: for fn in bytesfn:
f = open(os.path.join(self.bdir, fn), "w") f = open(os.path.join(self.bdir, fn), "w")
f.close() f.close()
fn = fsdecode(fn) fn = os.fsdecode(fn)
if fn in self.unicodefn: if fn in self.unicodefn:
raise ValueError("duplicate filename") raise ValueError("duplicate filename")
self.unicodefn.add(fn) self.unicodefn.add(fn)
...@@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase): ...@@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase):
self.assertNotEqual(os.lstat(link), os.stat(link)) self.assertNotEqual(os.lstat(link), os.stat(link))
class MiscTests(unittest.TestCase): class FSEncodingTests(unittest.TestCase):
def test_nop(self):
self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
@unittest.skipIf(os.name == "nt", "POSIX specific test") def test_identity(self):
def test_fsencode(self): # assert fsdecode(fsencode(x)) == x
self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff') for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff') try:
bytesfn = os.fsencode(fn)
except UnicodeEncodeError:
continue
self.assertEquals(os.fsdecode(bytesfn), fn)
def get_output(self, fs_encoding, func):
env = os.environ.copy()
env['PYTHONIOENCODING'] = 'utf-8'
env['PYTHONFSENCODING'] = fs_encoding
code = 'import os; print(%s, end="")' % func
process = subprocess.Popen(
[sys.executable, "-c", code],
stdout=subprocess.PIPE, env=env)
stdout, stderr = process.communicate()
self.assertEqual(process.returncode, 0)
return stdout.decode('utf-8')
def test_encodings(self):
def check(encoding, bytesfn, unicodefn):
encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
self.assertEqual(encoded, repr(bytesfn))
decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
self.assertEqual(decoded, repr(unicodefn))
check('ascii', b'abc\xff', 'abc\udcff')
check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
def test_main(): def test_main():
...@@ -1163,7 +1186,7 @@ def test_main(): ...@@ -1163,7 +1186,7 @@ def test_main():
Pep383Tests, Pep383Tests,
Win32KillTests, Win32KillTests,
Win32SymlinkTests, Win32SymlinkTests,
MiscTests, FSEncodingTests,
) )
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -116,6 +116,9 @@ Extensions ...@@ -116,6 +116,9 @@ Extensions
Library Library
------- -------
- Create os.fsdecode(): decode from the filesystem encoding with
surrogateescape error handler, or strict error handler on Windows.
- Issue #3488: Provide convenient shorthand functions ``gzip.compress`` - Issue #3488: Provide convenient shorthand functions ``gzip.compress``
and ``gzip.decompress``. Original patch by Anand B. Pillai. and ``gzip.decompress``. Original patch by Anand B. Pillai.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment