Kaydet (Commit) 28ab634f authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka

Issue #25596: Optimized glob() and iglob() functions in the

glob module; they are now about 3--6 times faster.
üst ea525a2d
...@@ -14,7 +14,7 @@ The :mod:`glob` module finds all the pathnames matching a specified pattern ...@@ -14,7 +14,7 @@ The :mod:`glob` module finds all the pathnames matching a specified pattern
according to the rules used by the Unix shell, although results are returned in according to the rules used by the Unix shell, although results are returned in
arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character
ranges expressed with ``[]`` will be correctly matched. This is done by using ranges expressed with ``[]`` will be correctly matched. This is done by using
the :func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and the :func:`os.scandir` and :func:`fnmatch.fnmatch` functions in concert, and
not by actually invoking a subshell. Note that unlike :func:`fnmatch.fnmatch`, not by actually invoking a subshell. Note that unlike :func:`fnmatch.fnmatch`,
:mod:`glob` treats filenames beginning with a dot (``.``) as special cases. :mod:`glob` treats filenames beginning with a dot (``.``) as special cases.
(For tilde and shell variable expansion, use :func:`os.path.expanduser` and (For tilde and shell variable expansion, use :func:`os.path.expanduser` and
......
...@@ -767,6 +767,10 @@ Optimizations ...@@ -767,6 +767,10 @@ Optimizations
Argument Clinic this overhead is significantly decreased. Argument Clinic this overhead is significantly decreased.
(Contributed by Serhiy Storchaka in :issue:`27574`). (Contributed by Serhiy Storchaka in :issue:`27574`).
* Optimized :func:`~glob.glob` and :func:`~glob.iglob` functions in the
:mod:`glob` module; they are now about 3--6 times faster.
(Contributed by Serhiy Storchaka in :issue:`25596`).
Build and C API Changes Build and C API Changes
======================= =======================
......
...@@ -30,15 +30,16 @@ def iglob(pathname, *, recursive=False): ...@@ -30,15 +30,16 @@ def iglob(pathname, *, recursive=False):
If recursive is true, the pattern '**' will match any files and If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories. zero or more directories and subdirectories.
""" """
it = _iglob(pathname, recursive) it = _iglob(pathname, recursive, False)
if recursive and _isrecursive(pathname): if recursive and _isrecursive(pathname):
s = next(it) # skip empty string s = next(it) # skip empty string
assert not s assert not s
return it return it
def _iglob(pathname, recursive): def _iglob(pathname, recursive, dironly):
dirname, basename = os.path.split(pathname) dirname, basename = os.path.split(pathname)
if not has_magic(pathname): if not has_magic(pathname):
assert not dironly
if basename: if basename:
if os.path.lexists(pathname): if os.path.lexists(pathname):
yield pathname yield pathname
...@@ -49,47 +50,39 @@ def _iglob(pathname, recursive): ...@@ -49,47 +50,39 @@ def _iglob(pathname, recursive):
return return
if not dirname: if not dirname:
if recursive and _isrecursive(basename): if recursive and _isrecursive(basename):
yield from glob2(dirname, basename) yield from _glob2(dirname, basename, dironly)
else: else:
yield from glob1(dirname, basename) yield from _glob1(dirname, basename, dironly)
return return
# `os.path.split()` returns the argument itself as a dirname if it is a # `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:'). # contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname): if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, recursive) dirs = _iglob(dirname, recursive, True)
else: else:
dirs = [dirname] dirs = [dirname]
if has_magic(basename): if has_magic(basename):
if recursive and _isrecursive(basename): if recursive and _isrecursive(basename):
glob_in_dir = glob2 glob_in_dir = _glob2
else: else:
glob_in_dir = glob1 glob_in_dir = _glob1
else: else:
glob_in_dir = glob0 glob_in_dir = _glob0
for dirname in dirs: for dirname in dirs:
for name in glob_in_dir(dirname, basename): for name in glob_in_dir(dirname, basename, dironly):
yield os.path.join(dirname, name) yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory. # These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0` # They return a list of basenames. _glob1 accepts a pattern while _glob0
# takes a literal basename (so it only has to check for its existence). # takes a literal basename (so it only has to check for its existence).
def glob1(dirname, pattern): def _glob1(dirname, pattern, dironly):
if not dirname: names = list(_iterdir(dirname, dironly))
if isinstance(pattern, bytes):
dirname = bytes(os.curdir, 'ASCII')
else:
dirname = os.curdir
try:
names = os.listdir(dirname)
except OSError:
return []
if not _ishidden(pattern): if not _ishidden(pattern):
names = [x for x in names if not _ishidden(x)] names = (x for x in names if not _ishidden(x))
return fnmatch.filter(names, pattern) return fnmatch.filter(names, pattern)
def glob0(dirname, basename): def _glob0(dirname, basename, dironly):
if not basename: if not basename:
# `os.path.split()` returns an empty basename for paths ending with a # `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories. # directory separator. 'q*x/' should match only directories.
...@@ -100,30 +93,49 @@ def glob0(dirname, basename): ...@@ -100,30 +93,49 @@ def glob0(dirname, basename):
return [basename] return [basename]
return [] return []
# Following functions are not public but can be used by third-party code.
def glob0(dirname, pattern):
return _glob0(dirname, pattern, False)
def glob1(dirname, pattern):
return _glob1(dirname, pattern, False)
# This helper function recursively yields relative pathnames inside a literal # This helper function recursively yields relative pathnames inside a literal
# directory. # directory.
def glob2(dirname, pattern): def _glob2(dirname, pattern, dironly):
assert _isrecursive(pattern) assert _isrecursive(pattern)
yield pattern[:0] yield pattern[:0]
yield from _rlistdir(dirname) yield from _rlistdir(dirname, dironly)
# Recursively yields relative pathnames inside a literal directory. # If dironly is false, yields all file names inside a directory.
def _rlistdir(dirname): # If dironly is true, yields only directory names.
def _iterdir(dirname, dironly):
if not dirname: if not dirname:
if isinstance(dirname, bytes): if isinstance(dirname, bytes):
dirname = bytes(os.curdir, 'ASCII') dirname = bytes(os.curdir, 'ASCII')
else: else:
dirname = os.curdir dirname = os.curdir
try: try:
names = os.listdir(dirname) with os.scandir(dirname) as it:
except os.error: for entry in it:
try:
if not dironly or entry.is_dir():
yield entry.name
except OSError:
pass
except OSError:
return return
# Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dironly):
names = list(_iterdir(dirname, dironly))
for x in names: for x in names:
if not _ishidden(x): if not _ishidden(x):
yield x yield x
path = os.path.join(dirname, x) if dirname else x path = os.path.join(dirname, x) if dirname else x
for y in _rlistdir(path): for y in _rlistdir(path, dironly):
yield os.path.join(x, y) yield os.path.join(x, y)
......
...@@ -89,6 +89,9 @@ Core and Builtins ...@@ -89,6 +89,9 @@ Core and Builtins
Library Library
------- -------
- Issue #25596: Optimized glob() and iglob() functions in the
glob module; they are now about 3--6 times faster.
- Issue #27928: Add scrypt (password-based key derivation function) to - Issue #27928: Add scrypt (password-based key derivation function) to
hashlib module (requires OpenSSL 1.1.0). hashlib module (requires OpenSSL 1.1.0).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment