findnocoding.py 2.63 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python

"""List all those Python files that require a coding directive

Usage: nocoding.py dir1 [dir2...]
"""

Georg Brandl's avatar
Georg Brandl committed
8
__author__ = "Oleg Broytmann, Georg Brandl"
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28

import sys, os, re, getopt

# our pysource module finds Python source files
try:
    import pysource
except:
    # emulate the module with a simple os.walk
    class pysource:
        has_python_ext = looks_like_python = can_be_compiled = None
        def walk_python_files(self, paths, *args, **kwargs):
            for path in paths:
                if os.path.isfile(path):
                    yield path.endswith(".py")
                elif os.path.isdir(path):
                    for root, dirs, files in os.walk(path):
                        for filename in files:
                            if filename.endswith(".py"):
                                yield os.path.join(root, filename)
    pysource = pysource()
29 30


31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
    print >>sys.stderr, ("The pysource module is not available; "
                         "no sophisticated Python source file search will be done.")


decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")

def get_declaration(line):
    match = decl_re.search(line)
    if match:
        return match.group(1)
    return ''

def has_correct_encoding(text, codec):
    try:
        unicode(text, codec)
    except UnicodeDecodeError:
        return False
    else:
        return True

def needs_declaration(fullpath):
    try:
        infile = open(fullpath, 'rU')
    except IOError: # Oops, the file was removed - ignore it
        return None

    line1 = infile.readline()
    line2 = infile.readline()
59

60 61 62 63
    if get_declaration(line1) or get_declaration(line2):
        # the file does have an encoding declaration, so trust it
        infile.close()
        return False
64

65 66 67
    # check the whole file for non-ASCII characters
    rest = infile.read()
    infile.close()
68

69 70
    if has_correct_encoding(line1+line2+rest, "ascii"):
        return False
71

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    return True


usage = """Usage: %s [-cd] paths...
    -c: recognize Python source files trying to compile them
    -d: debug output""" % sys.argv[0]

try:
    opts, args = getopt.getopt(sys.argv[1:], 'cd')
except getopt.error, msg:
    print >>sys.stderr, msg
    print >>sys.stderr, usage
    sys.exit(1)

is_python = pysource.looks_like_python
debug = False

for o, a in opts:
    if o == '-c':
        is_python = pysource.can_be_compiled
    elif o == '-d':
        debug = True

if not args:
    print >>sys.stderr, usage
    sys.exit(1)

for fullpath in pysource.walk_python_files(args, is_python):
    if debug:
        print "Testing for coding: %s" % fullpath
    result = needs_declaration(fullpath)
    if result:
        print fullpath