Unverified Kaydet (Commit) cc6e1b12 authored tarafından Joffrey F's avatar Joffrey F Kaydeden (comit) GitHub

Merge pull request #1914 from mefyl/master

Improve .dockerignore compliance
import os import os
import re
from ..constants import IS_WINDOWS_PLATFORM from ..constants import IS_WINDOWS_PLATFORM
from .fnmatch import fnmatch from fnmatch import fnmatch
from itertools import chain
from .utils import create_archive from .utils import create_archive
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
root = os.path.abspath(path) root = os.path.abspath(path)
exclude = exclude or [] exclude = exclude or []
return create_archive( return create_archive(
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)), files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
root=root, fileobj=fileobj, gzip=gzip root=root, fileobj=fileobj, gzip=gzip
) )
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
def exclude_paths(root, patterns, dockerfile=None): def exclude_paths(root, patterns, dockerfile=None):
""" """
Given a root directory path and a list of .dockerignore patterns, return Given a root directory path and a list of .dockerignore patterns, return
...@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None): ...@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
All paths returned are relative to the root. All paths returned are relative to the root.
""" """
if dockerfile is None: if dockerfile is None:
dockerfile = 'Dockerfile' dockerfile = 'Dockerfile'
patterns = [p.lstrip('/') for p in patterns] def normalize(p):
exceptions = [p for p in patterns if p.startswith('!')] # Leading and trailing slashes are not relevant. Yes,
# "foo.py/" must exclude the "foo.py" regular file. "."
include_patterns = [p[1:] for p in exceptions] # components are not relevant either, even if the whole
include_patterns += [dockerfile, '.dockerignore'] # pattern is only ".", as the Docker reference states: "For
# historical reasons, the pattern . is ignored."
exclude_patterns = list(set(patterns) - set(exceptions)) split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
# ".." component must be cleared with the potential previous
paths = get_paths(root, exclude_patterns, include_patterns, # component, regardless of whether it exists: "A preprocessing
has_exceptions=len(exceptions) > 0) # step [...] eliminates . and .. elements using Go's
# filepath.".
return set(paths).union( i = 0
# If the Dockerfile is in a subdirectory that is excluded, get_paths while i < len(split):
# will not descend into it and the file will be skipped. This ensures if split[i] == '..':
# it doesn't happen. del split[i]
set([dockerfile.replace('/', os.path.sep)]) if i > 0:
if os.path.exists(os.path.join(root, dockerfile)) else set() del split[i - 1]
) i -= 1
else:
i += 1
def should_include(path, exclude_patterns, include_patterns, root): return split
"""
Given a path, a list of exclude patterns, and a list of inclusion patterns: patterns = (
(True, normalize(p[1:]))
1. Returns True if the path doesn't match any exclusion pattern if p.startswith('!') else
2. Returns False if the path matches an exclusion pattern and doesn't match (False, normalize(p))
an inclusion pattern for p in patterns)
3. Returns true if the path matches an exclusion pattern and matches an patterns = list(reversed(list(chain(
inclusion pattern # Exclude empty patterns such as "." or the empty string.
""" filter(lambda p: p[1], patterns),
for pattern in exclude_patterns: # Always include the Dockerfile and .dockerignore
if match_path(path, pattern): [(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
for pattern in include_patterns: return set(walk(root, patterns))
if match_path(path, pattern):
return True
if os.path.isabs(pattern) and match_path( def walk(root, patterns, default=True):
os.path.join(root, path), pattern):
return True
return False
return True
def should_check_directory(directory_path, exclude_patterns, include_patterns,
root):
""" """
Given a directory path, a list of exclude patterns, and a list of inclusion A collection of file lying below root that should be included according to
patterns: patterns.
1. Returns True if the directory path should be included according to
should_include.
2. Returns True if the directory path is the prefix for an inclusion
pattern
3. Returns False otherwise
""" """
# To account for exception rules, check directories if their path is a def match(p):
# a prefix to an inclusion pattern. This logic conforms with the current if p[1][0] == '**':
# docker logic (2016-10-27): rec = (p[0], p[1][1:])
# https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 return [p] + (match(rec) if rec[1] else [rec])
elif fnmatch(f, p[1][0]):
def normalize_path(path): return [(p[0], p[1][1:])]
return path.replace(os.path.sep, '/') else:
return []
path_with_slash = normalize_path(directory_path) + '/'
possible_child_patterns = [ for f in os.listdir(root):
pattern for pattern in map(normalize_path, include_patterns) cur = os.path.join(root, f)
if (pattern + '/').startswith(path_with_slash) # The patterns if recursing in that directory.
] sub = list(chain(*(match(p) for p in patterns)))
directory_included = should_include( # Whether this file is explicitely included / excluded.
directory_path, exclude_patterns, include_patterns, root hit = next((p[0] for p in sub if not p[1]), None)
) # Whether this file is implicitely included / excluded.
return directory_included or len(possible_child_patterns) > 0 matched = default if hit is None else hit
sub = list(filter(lambda p: p[1], sub))
if os.path.isdir(cur):
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): # Entirely skip directories if there are no chance any subfile will
paths = [] # be included.
if all(not p[0] for p in sub) and not matched:
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False): continue
parent = os.path.relpath(parent, root) # I think this would greatly speed up dockerignore handling by not
if parent == '.': # recursing into directories we are sure would be entirely
parent = '' # included, and only yielding the directory itself, which will be
# recursively archived anyway. However the current unit test expect
# Remove excluded patterns from the list of directories to traverse # the full list of subfiles and I'm not 100% sure it would make no
# by mutating the dirs we're iterating over. # difference yet.
# This looks strange, but is considered the correct way to skip # if all(p[0] for p in sub) and matched:
# traversal. See https://docs.python.org/2/library/os.html#os.walk # yield f
dirs[:] = [ # continue
d for d in dirs if should_check_directory( children = False
os.path.join(parent, d), exclude_patterns, include_patterns, for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
root yield r
) children = True
] # The current unit tests expect directories only under those
# conditions. It might be simplifiable though.
for path in dirs: if (not sub or not children) and hit or hit is None and default:
if should_include(os.path.join(parent, path), yield f
exclude_patterns, include_patterns, root): elif matched:
paths.append(os.path.join(parent, path)) yield f
for path in files:
if should_include(os.path.join(parent, path),
exclude_patterns, include_patterns, root):
paths.append(os.path.join(parent, path))
return paths
def match_path(path, pattern):
pattern = pattern.rstrip('/' + os.path.sep)
if pattern and not os.path.isabs(pattern):
pattern = os.path.relpath(pattern)
pattern_components = pattern.split(os.path.sep)
if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
pattern_components = pattern.split('/')
if '**' not in pattern:
path_components = path.split(os.path.sep)[:len(pattern_components)]
else:
path_components = path.split(os.path.sep)
return fnmatch('/'.join(path_components), '/'.join(pattern_components))
...@@ -23,7 +23,6 @@ from docker.utils import ( ...@@ -23,7 +23,6 @@ from docker.utils import (
decode_json_header, tar, split_command, parse_devices, update_headers, decode_json_header, tar, split_command, parse_devices, update_headers,
) )
from docker.utils.build import should_check_directory
from docker.utils.ports import build_port_bindings, split_port from docker.utils.ports import build_port_bindings, split_port
from docker.utils.utils import format_environment from docker.utils.utils import format_environment
...@@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase): ...@@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase):
self.all_paths - set(['foo/a.py']) self.all_paths - set(['foo/a.py'])
) )
def test_exclude_include_absolute_path(self):
base = make_tree([], ['a.py', 'b.py'])
assert exclude_paths(
base,
['/*', '!/*.py']
) == set(['a.py', 'b.py'])
def test_single_subdir_with_path_traversal(self): def test_single_subdir_with_path_traversal(self):
assert self.exclude(['foo/whoops/../a.py']) == convert_paths( assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
self.all_paths - set(['foo/a.py']) self.all_paths - set(['foo/a.py'])
...@@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase): ...@@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase):
) )
) )
def test_exclude_include_absolute_path(self): def test_include_wildcard(self):
base = make_tree([], ['a.py', 'b.py']) base = make_tree(['a'], ['a/b.py'])
assert exclude_paths( assert exclude_paths(
base, base,
['/*', '!' + os.path.join(base, '*.py')] ['*', '!*/b.py']
) == set(['a.py', 'b.py']) ) == convert_paths(['a/b.py'])
def test_last_line_precedence(self):
base = make_tree(
[],
['garbage.md',
'thrash.md',
'README.md',
'README-bis.md',
'README-secret.md'])
assert exclude_paths(
base,
['*.md', '!README*.md', 'README-secret.md']
) == set(['README.md', 'README-bis.md'])
class TarTest(unittest.TestCase): class TarTest(unittest.TestCase):
...@@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase): ...@@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase):
assert tar_data.getmember('th.txt').mtime == -3600 assert tar_data.getmember('th.txt').mtime == -3600
class ShouldCheckDirectoryTest(unittest.TestCase):
exclude_patterns = [
'exclude_rather_large_directory',
'dir/with/subdir_excluded',
'dir/with/exceptions'
]
include_patterns = [
'dir/with/exceptions/like_this_one',
'dir/with/exceptions/in/descendents'
]
def test_should_check_directory_not_excluded(self):
assert should_check_directory(
'not_excluded', self.exclude_patterns, self.include_patterns, '.'
)
assert should_check_directory(
convert_path('dir/with'), self.exclude_patterns,
self.include_patterns, '.'
)
def test_shoud_check_parent_directories_of_excluded(self):
assert should_check_directory(
'dir', self.exclude_patterns, self.include_patterns, '.'
)
assert should_check_directory(
convert_path('dir/with'), self.exclude_patterns,
self.include_patterns, '.'
)
def test_should_not_check_excluded_directories_with_no_exceptions(self):
assert not should_check_directory(
'exclude_rather_large_directory', self.exclude_patterns,
self.include_patterns, '.'
)
assert not should_check_directory(
convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
self.include_patterns, '.'
)
def test_should_check_excluded_directory_with_exceptions(self):
assert should_check_directory(
convert_path('dir/with/exceptions'), self.exclude_patterns,
self.include_patterns, '.'
)
assert should_check_directory(
convert_path('dir/with/exceptions/in'), self.exclude_patterns,
self.include_patterns, '.'
)
def test_should_not_check_siblings_of_exceptions(self):
assert not should_check_directory(
convert_path('dir/with/exceptions/but_not_here'),
self.exclude_patterns, self.include_patterns, '.'
)
def test_should_check_subdirectories_of_exceptions(self):
assert should_check_directory(
convert_path('dir/with/exceptions/like_this_one/subdir'),
self.exclude_patterns, self.include_patterns, '.'
)
class FormatEnvironmentTest(unittest.TestCase): class FormatEnvironmentTest(unittest.TestCase):
def test_format_env_binary_unicode_value(self): def test_format_env_binary_unicode_value(self):
env_dict = { env_dict = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment