Kaydet (Commit) 680cb152 authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka

Issue #26032: Optimized globbing in pathlib by using os.scandir(); it is now

about 1.5--4 times faster.
üst 1194c6df
...@@ -808,6 +808,9 @@ Optimizations ...@@ -808,6 +808,9 @@ Optimizations
:mod:`glob` module; they are now about 3--6 times faster. :mod:`glob` module; they are now about 3--6 times faster.
(Contributed by Serhiy Storchaka in :issue:`25596`). (Contributed by Serhiy Storchaka in :issue:`25596`).
* Optimized globbing in :mod:`pathlib` by using :func:`os.scandir`;
it is now about 1.5--4 times faster.
(Contributed by Serhiy Storchaka in :issue:`26032`).
Build and C API Changes Build and C API Changes
======================= =======================
......
...@@ -385,6 +385,8 @@ class _NormalAccessor(_Accessor): ...@@ -385,6 +385,8 @@ class _NormalAccessor(_Accessor):
listdir = _wrap_strfunc(os.listdir) listdir = _wrap_strfunc(os.listdir)
scandir = _wrap_strfunc(os.scandir)
chmod = _wrap_strfunc(os.chmod) chmod = _wrap_strfunc(os.chmod)
if hasattr(os, "lchmod"): if hasattr(os, "lchmod"):
...@@ -429,25 +431,6 @@ _normal_accessor = _NormalAccessor() ...@@ -429,25 +431,6 @@ _normal_accessor = _NormalAccessor()
# Globbing helpers # Globbing helpers
# #
@contextmanager
def _cached(func):
try:
func.__cached__
yield func
except AttributeError:
cache = {}
def wrapper(*args):
try:
return cache[args]
except KeyError:
value = cache[args] = func(*args)
return value
wrapper.__cached__ = True
try:
yield wrapper
finally:
cache.clear()
def _make_selector(pattern_parts): def _make_selector(pattern_parts):
pat = pattern_parts[0] pat = pattern_parts[0]
child_parts = pattern_parts[1:] child_parts = pattern_parts[1:]
...@@ -473,8 +456,10 @@ class _Selector: ...@@ -473,8 +456,10 @@ class _Selector:
self.child_parts = child_parts self.child_parts = child_parts
if child_parts: if child_parts:
self.successor = _make_selector(child_parts) self.successor = _make_selector(child_parts)
self.dironly = True
else: else:
self.successor = _TerminatingSelector() self.successor = _TerminatingSelector()
self.dironly = False
def select_from(self, parent_path): def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this """Iterate over all child paths of `parent_path` matched by this
...@@ -482,13 +467,15 @@ class _Selector: ...@@ -482,13 +467,15 @@ class _Selector:
path_cls = type(parent_path) path_cls = type(parent_path)
is_dir = path_cls.is_dir is_dir = path_cls.is_dir
exists = path_cls.exists exists = path_cls.exists
listdir = parent_path._accessor.listdir scandir = parent_path._accessor.scandir
return self._select_from(parent_path, is_dir, exists, listdir) if not is_dir(parent_path):
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir)
class _TerminatingSelector: class _TerminatingSelector:
def _select_from(self, parent_path, is_dir, exists, listdir): def _select_from(self, parent_path, is_dir, exists, scandir):
yield parent_path yield parent_path
...@@ -498,13 +485,11 @@ class _PreciseSelector(_Selector): ...@@ -498,13 +485,11 @@ class _PreciseSelector(_Selector):
self.name = name self.name = name
_Selector.__init__(self, child_parts) _Selector.__init__(self, child_parts)
def _select_from(self, parent_path, is_dir, exists, listdir): def _select_from(self, parent_path, is_dir, exists, scandir):
try: try:
if not is_dir(parent_path):
return
path = parent_path._make_child_relpath(self.name) path = parent_path._make_child_relpath(self.name)
if exists(path): if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, listdir): for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p yield p
except PermissionError: except PermissionError:
return return
...@@ -516,17 +501,18 @@ class _WildcardSelector(_Selector): ...@@ -516,17 +501,18 @@ class _WildcardSelector(_Selector):
self.pat = re.compile(fnmatch.translate(pat)) self.pat = re.compile(fnmatch.translate(pat))
_Selector.__init__(self, child_parts) _Selector.__init__(self, child_parts)
def _select_from(self, parent_path, is_dir, exists, listdir): def _select_from(self, parent_path, is_dir, exists, scandir):
try: try:
if not is_dir(parent_path):
return
cf = parent_path._flavour.casefold cf = parent_path._flavour.casefold
for name in listdir(parent_path): entries = list(scandir(parent_path))
casefolded = cf(name) for entry in entries:
if self.pat.match(casefolded): if not self.dironly or entry.is_dir():
path = parent_path._make_child_relpath(name) name = entry.name
for p in self.successor._select_from(path, is_dir, exists, listdir): casefolded = cf(name)
yield p if self.pat.match(casefolded):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir):
yield p
except PermissionError: except PermissionError:
return return
...@@ -537,32 +523,30 @@ class _RecursiveWildcardSelector(_Selector): ...@@ -537,32 +523,30 @@ class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts): def __init__(self, pat, child_parts):
_Selector.__init__(self, child_parts) _Selector.__init__(self, child_parts)
def _iterate_directories(self, parent_path, is_dir, listdir): def _iterate_directories(self, parent_path, is_dir, scandir):
yield parent_path yield parent_path
try: try:
for name in listdir(parent_path): entries = list(scandir(parent_path))
path = parent_path._make_child_relpath(name) for entry in entries:
if is_dir(path) and not path.is_symlink(): if entry.is_dir() and not entry.is_symlink():
for p in self._iterate_directories(path, is_dir, listdir): path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, is_dir, scandir):
yield p yield p
except PermissionError: except PermissionError:
return return
def _select_from(self, parent_path, is_dir, exists, listdir): def _select_from(self, parent_path, is_dir, exists, scandir):
try: try:
if not is_dir(parent_path): yielded = set()
return try:
with _cached(listdir) as listdir: successor_select = self.successor._select_from
yielded = set() for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
try: for p in successor_select(starting_point, is_dir, exists, scandir):
successor_select = self.successor._select_from if p not in yielded:
for starting_point in self._iterate_directories(parent_path, is_dir, listdir): yield p
for p in successor_select(starting_point, is_dir, exists, listdir): yielded.add(p)
if p not in yielded: finally:
yield p yielded.clear()
yielded.add(p)
finally:
yielded.clear()
except PermissionError: except PermissionError:
return return
......
...@@ -109,6 +109,9 @@ Library ...@@ -109,6 +109,9 @@ Library
- Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib. - Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.
- Issue #26032: Optimized globbing in pathlib by using os.scandir(); it is now
about 1.5--4 times faster.
- Issue #25596: Optimized glob() and iglob() functions in the - Issue #25596: Optimized glob() and iglob() functions in the
glob module; they are now about 3--6 times faster. glob module; they are now about 3--6 times faster.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment