pysource.py 3.76 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#!/usr/bin/env python

"""\
List python source files.

There are three functions to check whether a file is a Python source, listed
here with increasing complexity:

- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
  the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().

The file also must be of appropriate size - not bigger than a megabyte.

walk_python_files() recursively lists all Python files under the given directories.
"""
Georg Brandl's avatar
Georg Brandl committed
18
__author__ = "Oleg Broytmann, Georg Brandl"
19 20 21 22

__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]


23
import os, re
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59

binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')

debug = False

def print_debug(msg):
    if debug: print msg


def _open(fullpath):
    try:
        size = os.stat(fullpath).st_size
    except OSError, err: # Permission denied - ignore the file
        print_debug("%s: permission denied: %s" % (fullpath, err))
        return None

    if size > 1024*1024: # too big
        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
        return None

    try:
        return open(fullpath, 'rU')
    except IOError, err: # Access denied, or a special file - ignore it
        print_debug("%s: access denied: %s" % (fullpath, err))
        return None

def has_python_ext(fullpath):
    return fullpath.endswith(".py") or fullpath.endswith(".pyw")

def looks_like_python(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

    line = infile.readline()
    infile.close()
60

61 62 63 64
    if binary_re.search(line):
        # file appears to be binary
        print_debug("%s: appears to be binary" % fullpath)
        return False
65

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
        return True
    elif "python" in line:
        # disguised Python script (e.g. CGI)
        return True

    return False

def can_be_compiled(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

    code = infile.read()
    infile.close()

    try:
        compile(code, fullpath, "exec")
    except Exception, err:
        print_debug("%s: cannot compile: %s" % (fullpath, err))
        return False

    return True


def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
    """\
    Recursively yield all Python source files below the given paths.

    paths: a list of files and/or directories to be checked.
    is_python: a function that takes a file name and checks whether it is a
               Python source file
98
    exclude_dirs: a list of directory base names that should be excluded in
99 100 101 102
                  the search
    """
    if exclude_dirs is None:
        exclude_dirs=[]
103

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
    for path in paths:
        print_debug("testing: %s" % path)
        if os.path.isfile(path):
            if is_python(path):
                yield path
        elif os.path.isdir(path):
            print_debug("    it is a directory")
            for dirpath, dirnames, filenames in os.walk(path):
                for exclude in exclude_dirs:
                    if exclude in dirnames:
                        dirnames.remove(exclude)
                for filename in filenames:
                    fullpath = os.path.join(dirpath, filename)
                    print_debug("testing: %s" % fullpath)
                    if is_python(fullpath):
                        yield fullpath
        else:
            print_debug("    unknown type")


if __name__ == "__main__":
    # Two simple examples/tests
    for fullpath in walk_python_files(['.']):
        print fullpath
    print "----------"
    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
        print fullpath