Kaydet (Commit) a49166ab authored tarafından Thomas Boyt's avatar Thomas Boyt

Improve get_paths performance by not descending into ignored directories

Signed-off-by: 's avatarThomas Boyt <thomas@ledgerx.com>
üst 28864df2
...@@ -107,38 +107,68 @@ def exclude_paths(root, patterns, dockerfile=None): ...@@ -107,38 +107,68 @@ def exclude_paths(root, patterns, dockerfile=None):
exclude_patterns = list(set(patterns) - set(exceptions)) exclude_patterns = list(set(patterns) - set(exceptions))
all_paths = get_paths(root) paths = get_paths(root, exclude_patterns, include_patterns,
has_exceptions=len(exceptions) > 0)
# Remove all paths that are matched by any exclusion pattern
paths = [
p for p in all_paths
if not any(match_path(p, pattern) for pattern in exclude_patterns)
]
# Add back the set of paths that are matched by any inclusion pattern.
# Include parent dirs - if we add back 'foo/bar', add 'foo' as well
for p in all_paths:
if any(match_path(p, pattern) for pattern in include_patterns):
components = p.split('/')
paths += [
'/'.join(components[:end])
for end in range(1, len(components) + 1)
]
return set(paths) return set(paths)
def get_paths(root): def should_include(path, exclude_patterns, include_patterns):
"""
Given a path, a list of exclude patterns, and a list of inclusion patterns:
1. Returns True if the path doesn't match any exclusion pattern
2. Returns False if the path matches an exclusion pattern and doesn't match
an inclusion pattern
3. Returns true if the path matches an exclusion pattern and matches an
inclusion pattern
"""
for pattern in exclude_patterns:
if match_path(path, pattern):
for pattern in include_patterns:
if match_path(path, pattern):
return True
return False
return True
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
paths = [] paths = []
for parent, dirs, files in os.walk(root, followlinks=False): for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
parent = os.path.relpath(parent, root) parent = os.path.relpath(parent, root)
if parent == '.': if parent == '.':
parent = '' parent = ''
# If exception rules exist, we can't skip recursing into ignored
# directories, as we need to look for exceptions in them.
#
# It may be possible to optimize this further for exception patterns
# that *couldn't* match within ignored directores.
#
# This matches the current docker logic (as of 2015-11-24):
# https://github.com/docker/docker/blob/37ba67bf636b34dc5c0c0265d62a089d0492088f/pkg/archive/archive.go#L555-L557
if not has_exceptions:
# Remove excluded patterns from the list of directories to traverse
# by mutating the dirs we're iterating over.
# This looks strange, but is considered the correct way to skip
# traversal. See https://docs.python.org/2/library/os.html#os.walk
dirs[:] = [d for d in dirs if
should_include(os.path.join(parent, d),
exclude_patterns, include_patterns)]
for path in dirs: for path in dirs:
paths.append(os.path.join(parent, path)) if should_include(os.path.join(parent, path),
exclude_patterns, include_patterns):
paths.append(os.path.join(parent, path))
for path in files: for path in files:
paths.append(os.path.join(parent, path)) if should_include(os.path.join(parent, path),
exclude_patterns, include_patterns):
paths.append(os.path.join(parent, path))
return paths return paths
......
...@@ -65,6 +65,7 @@ class BuildTest(helpers.BaseTestCase): ...@@ -65,6 +65,7 @@ class BuildTest(helpers.BaseTestCase):
'ignored', 'ignored',
'Dockerfile', 'Dockerfile',
'.dockerignore', '.dockerignore',
'!ignored/subdir/excepted-file',
'', # empty line '', # empty line
])) ]))
...@@ -76,6 +77,9 @@ class BuildTest(helpers.BaseTestCase): ...@@ -76,6 +77,9 @@ class BuildTest(helpers.BaseTestCase):
with open(os.path.join(subdir, 'file'), 'w') as f: with open(os.path.join(subdir, 'file'), 'w') as f:
f.write("this file should be ignored") f.write("this file should be ignored")
with open(os.path.join(subdir, 'excepted-file'), 'w') as f:
f.write("this file should not be ignored")
tag = 'docker-py-test-build-with-dockerignore' tag = 'docker-py-test-build-with-dockerignore'
stream = self.client.build( stream = self.client.build(
path=base_dir, path=base_dir,
...@@ -84,7 +88,7 @@ class BuildTest(helpers.BaseTestCase): ...@@ -84,7 +88,7 @@ class BuildTest(helpers.BaseTestCase):
for chunk in stream: for chunk in stream:
pass pass
c = self.client.create_container(tag, ['ls', '-1A', '/test']) c = self.client.create_container(tag, ['find', '/test', '-type', 'f'])
self.client.start(c) self.client.start(c)
self.client.wait(c) self.client.wait(c)
logs = self.client.logs(c) logs = self.client.logs(c)
...@@ -93,8 +97,9 @@ class BuildTest(helpers.BaseTestCase): ...@@ -93,8 +97,9 @@ class BuildTest(helpers.BaseTestCase):
logs = logs.decode('utf-8') logs = logs.decode('utf-8')
self.assertEqual( self.assertEqual(
list(filter(None, logs.split('\n'))), sorted(list(filter(None, logs.split('\n')))),
['not-ignored'], sorted(['/test/ignored/subdir/excepted-file',
'/test/not-ignored']),
) )
@requires_api_version('1.21') @requires_api_version('1.21')
......
...@@ -671,17 +671,17 @@ class ExcludePathsTest(base.BaseTestCase): ...@@ -671,17 +671,17 @@ class ExcludePathsTest(base.BaseTestCase):
def test_directory_with_single_exception(self): def test_directory_with_single_exception(self):
assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([ assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([
'foo/a.py', 'foo/b.py', 'foo/a.py', 'foo/b.py', 'foo', 'foo/bar'
]) ])
def test_directory_with_subdir_exception(self): def test_directory_with_subdir_exception(self):
assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([ assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([
'foo/a.py', 'foo/b.py', 'foo/a.py', 'foo/b.py', 'foo'
]) ])
def test_directory_with_wildcard_exception(self): def test_directory_with_wildcard_exception(self):
assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([ assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([
'foo/bar', 'foo/bar/a.py', 'foo/bar', 'foo/bar/a.py', 'foo'
]) ])
def test_subdirectory(self): def test_subdirectory(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment