glob.py 3.38 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1
"""Filename globbing utility."""
Guido van Rossum's avatar
Guido van Rossum committed
2

Guido van Rossum's avatar
Guido van Rossum committed
3
import os
4
import re
5
import fnmatch
Guido van Rossum's avatar
Guido van Rossum committed
6

Johannes Gijsbers's avatar
Johannes Gijsbers committed
7
__all__ = ["glob", "iglob"]
Guido van Rossum's avatar
Guido van Rossum committed
8

Guido van Rossum's avatar
Guido van Rossum committed
9
def glob(pathname):
Tim Peters's avatar
Tim Peters committed
10 11
    """Return a list of paths matching a pathname pattern.

12 13 14 15
    The pattern may contain simple shell-style wildcards a la
    fnmatch. However, unlike fnmatch, filenames starting with a
    dot are special cases that are not matched by '*' and '?'
    patterns.
Tim Peters's avatar
Tim Peters committed
16

Johannes Gijsbers's avatar
Johannes Gijsbers committed
17 18 19 20
    """
    return list(iglob(pathname))

def iglob(pathname):
Benjamin Peterson's avatar
Benjamin Peterson committed
21
    """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers's avatar
Johannes Gijsbers committed
22

23 24 25 26
    The pattern may contain simple shell-style wildcards a la
    fnmatch. However, unlike fnmatch, filenames starting with a
    dot are special cases that are not matched by '*' and '?'
    patterns.
Johannes Gijsbers's avatar
Johannes Gijsbers committed
27

Tim Peters's avatar
Tim Peters committed
28
    """
29
    dirname, basename = os.path.split(pathname)
Tim Peters's avatar
Tim Peters committed
30
    if not has_magic(pathname):
31 32 33 34 35 36 37
        if basename:
            if os.path.lexists(pathname):
                yield pathname
        else:
            # Patterns ending with a slash should match only directories
            if os.path.isdir(dirname):
                yield pathname
Johannes Gijsbers's avatar
Johannes Gijsbers committed
38
        return
39 40 41
    if not dirname:
        yield from glob1(None, basename)
        return
42 43 44 45
    # `os.path.split()` returns the argument itself as a dirname if it is a
    # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path
    # contains magic characters (i.e. r'\\?\C:').
    if dirname != pathname and has_magic(dirname):
46 47 48 49 50 51 52 53 54 55
        dirs = iglob(dirname)
    else:
        dirs = [dirname]
    if has_magic(basename):
        glob_in_dir = glob1
    else:
        glob_in_dir = glob0
    for dirname in dirs:
        for name in glob_in_dir(dirname, basename):
            yield os.path.join(dirname, name)
Johannes Gijsbers's avatar
Johannes Gijsbers committed
56 57 58 59

# These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
# takes a literal basename (so it only has to check for its existence).
Guido van Rossum's avatar
Guido van Rossum committed
60 61

def glob1(dirname, pattern):
Johannes Gijsbers's avatar
Johannes Gijsbers committed
62
    if not dirname:
63 64 65 66
        if isinstance(pattern, bytes):
            dirname = bytes(os.curdir, 'ASCII')
        else:
            dirname = os.curdir
Tim Peters's avatar
Tim Peters committed
67 68
    try:
        names = os.listdir(dirname)
69
    except OSError:
Tim Peters's avatar
Tim Peters committed
70
        return []
71 72
    if not _ishidden(pattern):
        names = [x for x in names if not _ishidden(x)]
73
    return fnmatch.filter(names, pattern)
Guido van Rossum's avatar
Guido van Rossum committed
74

Johannes Gijsbers's avatar
Johannes Gijsbers committed
75
def glob0(dirname, basename):
76
    if not basename:
Johannes Gijsbers's avatar
Johannes Gijsbers committed
77 78
        # `os.path.split()` returns an empty basename for paths ending with a
        # directory separator.  'q*x/' should match only directories.
79
        if os.path.isdir(dirname):
Johannes Gijsbers's avatar
Johannes Gijsbers committed
80 81 82 83 84 85
            return [basename]
    else:
        if os.path.lexists(os.path.join(dirname, basename)):
            return [basename]
    return []

86

87 88
magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])')
89

Guido van Rossum's avatar
Guido van Rossum committed
90
def has_magic(s):
91 92 93 94 95
    if isinstance(s, bytes):
        match = magic_check_bytes.search(s)
    else:
        match = magic_check.search(s)
    return match is not None
96 97 98

def _ishidden(path):
    return path[0] in ('.', b'.'[0])
99 100 101 102 103 104 105 106 107 108 109 110

def escape(pathname):
    """Escape all special characters.
    """
    # Escaping is done by wrapping any of "*?[" between square brackets.
    # Metacharacters do not work in the drive part and shouldn't be escaped.
    drive, pathname = os.path.splitdrive(pathname)
    if isinstance(pathname, bytes):
        pathname = magic_check_bytes.sub(br'[\1]', pathname)
    else:
        pathname = magic_check.sub(r'[\1]', pathname)
    return drive + pathname