pysource.py 3.77 KB
Newer Older
1
#!/usr/bin/env python3
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17

"""\
List python source files.

There are three functions to check whether a file is a Python source, listed
here with increasing complexity:

- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
  the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().

The file also must be of appropriate size - not bigger than a megabyte.

walk_python_files() recursively lists all Python files under the given directories.
"""
18
__author__ = "Oleg Broytmann, Georg Brandl"
19 20 21 22

__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]


23
import os, re
24

25
binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
26 27 28 29

debug = False

def print_debug(msg):
30
    if debug: print(msg)
31 32 33 34 35


def _open(fullpath):
    try:
        size = os.stat(fullpath).st_size
36
    except OSError as err: # Permission denied - ignore the file
37 38 39 40 41 42 43 44
        print_debug("%s: permission denied: %s" % (fullpath, err))
        return None

    if size > 1024*1024: # too big
        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
        return None

    try:
45
        return open(fullpath, "rb")
46
    except IOError as err: # Access denied, or a special file - ignore it
47 48 49 50 51 52 53 54 55 56 57
        print_debug("%s: access denied: %s" % (fullpath, err))
        return None

def has_python_ext(fullpath):
    return fullpath.endswith(".py") or fullpath.endswith(".pyw")

def looks_like_python(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

58 59
    with infile:
        line = infile.readline()
60

61 62 63 64
    if binary_re.search(line):
        # file appears to be binary
        print_debug("%s: appears to be binary" % fullpath)
        return False
65

66 67
    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
        return True
68
    elif b"python" in line:
69 70 71 72 73 74 75 76 77 78
        # disguised Python script (e.g. CGI)
        return True

    return False

def can_be_compiled(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

79 80
    with infile:
        code = infile.read()
81 82 83

    try:
        compile(code, fullpath, "exec")
84
    except Exception as err:
85 86 87 88 89 90 91 92 93 94 95 96 97
        print_debug("%s: cannot compile: %s" % (fullpath, err))
        return False

    return True


def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
    """\
    Recursively yield all Python source files below the given paths.

    paths: a list of files and/or directories to be checked.
    is_python: a function that takes a file name and checks whether it is a
               Python source file
98
    exclude_dirs: a list of directory base names that should be excluded in
99 100 101 102
                  the search
    """
    if exclude_dirs is None:
        exclude_dirs=[]
103

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
    for path in paths:
        print_debug("testing: %s" % path)
        if os.path.isfile(path):
            if is_python(path):
                yield path
        elif os.path.isdir(path):
            print_debug("    it is a directory")
            for dirpath, dirnames, filenames in os.walk(path):
                for exclude in exclude_dirs:
                    if exclude in dirnames:
                        dirnames.remove(exclude)
                for filename in filenames:
                    fullpath = os.path.join(dirpath, filename)
                    print_debug("testing: %s" % fullpath)
                    if is_python(fullpath):
                        yield fullpath
        else:
            print_debug("    unknown type")


if __name__ == "__main__":
    # Two simple examples/tests
    for fullpath in walk_python_files(['.']):
127 128
        print(fullpath)
    print("----------")
129
    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
130
        print(fullpath)