pyclbr.py 14.8 KB
Newer Older
1
"""Parse a Python module and describe its classes and functions.
2

3
Parse enough of a Python file to recognize imports and class and
4
function definitions, and to find out the superclasses of a class.
5 6

The interface consists of a single function:
7
    readmodule_ex(module, path=None)
8 9
where module is the name of a Python module, and path is an optional
list of directories where the module is to be searched.  If present,
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
path is prepended to the system search path sys.path.  The return value
is a dictionary.  The keys of the dictionary are the names of the
classes and functions defined in the module (including classes that are
defined via the from XXX import YYY construct).  The values are
instances of classes Class and Function.  One special key/value pair is
present for packages: the key '__path__' has a list as its value which
contains the package search path.

Classes and Functions have a common superclass: _Object.  Every instance
has the following attributes:
    module  -- name of the module;
    name    -- name of the object;
    file    -- file in which the object is defined;
    lineno  -- line in the file where the object's definition starts;
    parent  -- parent of this object, if any;
    children -- nested objects contained in this object.
The 'children' attribute is a dictionary mapping names to objects.

Instances of Function describe functions with the attributes from _Object.

Instances of Class describe classes with the attributes from _Object,
plus the following:
    super   -- list of super classes (Class instances if possible);
    methods -- mapping of method names to beginning line numbers.
34 35 36 37 38
If the name of a super class is not recognized, the corresponding
entry in the list of super classes is not a class instance but a
string giving the name of the super class.  Since import statements
are recognized and imported modules are scanned as well, this
shouldn't happen often.
39
"""
40

41
import io
42
import sys
43
import importlib.util
Christian Heimes's avatar
Christian Heimes committed
44 45
import tokenize
from token import NAME, DEDENT, OP
46

47
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
48

49
_modules = {}  # Initialize cache of modules we've seen.
50

51 52 53 54

class _Object:
    "Informaton about Python class or function."
    def __init__(self, module, name, file, lineno, parent):
Tim Peters's avatar
Tim Peters committed
55 56 57 58
        self.module = module
        self.name = name
        self.file = file
        self.lineno = lineno
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
        self.parent = parent
        self.children = {}

    def _addchild(self, name, obj):
        self.children[name] = obj


class Function(_Object):
    "Information about a Python function, including methods."
    def __init__(self, module, name, file, lineno, parent=None):
        _Object.__init__(self, module, name, file, lineno, parent)


class Class(_Object):
    "Information about a Python class."
    def __init__(self, module, name, super, file, lineno, parent=None):
        _Object.__init__(self, module, name, file, lineno, parent)
        self.super = [] if super is None else super
        self.methods = {}
Tim Peters's avatar
Tim Peters committed
78 79 80

    def _addmethod(self, name, lineno):
        self.methods[name] = lineno
81

82 83 84 85 86 87 88 89 90 91 92 93 94 95

def _nest_function(ob, func_name, lineno):
    "Return a Function after nesting within ob."
    newfunc = Function(ob.module, func_name, ob.file, lineno, ob)
    ob._addchild(func_name, newfunc)
    if isinstance(ob, Class):
        ob._addmethod(func_name, lineno)
    return newfunc

def _nest_class(ob, class_name, lineno, super=None):
    "Return a Class after nesting within ob."
    newclass = Class(ob.module, class_name, super, ob.file, lineno, ob)
    ob._addchild(class_name, newclass)
    return newclass
96

Christian Heimes's avatar
Christian Heimes committed
97
def readmodule(module, path=None):
98
    """Return Class objects for the top-level classes in module.
99

100 101
    This is the original interface, before Functions were added.
    """
102

Tim Peters's avatar
Tim Peters committed
103
    res = {}
Christian Heimes's avatar
Christian Heimes committed
104
    for key, value in _readmodule(module, path or []).items():
105
        if isinstance(value, Class):
Tim Peters's avatar
Tim Peters committed
106 107
            res[key] = value
    return res
108

Christian Heimes's avatar
Christian Heimes committed
109
def readmodule_ex(module, path=None):
110
    """Return a dictionary with all functions and classes in module.
Tim Peters's avatar
Tim Peters committed
111

112 113 114 115
    Search for module in PATH + sys.path.
    If possible, include imported superclasses.
    Do this by reading source, without importing (and executing) it.
    """
Christian Heimes's avatar
Christian Heimes committed
116 117 118
    return _readmodule(module, path or [])

def _readmodule(module, path, inpackage=None):
119
    """Do the hard work for readmodule[_ex].
Tim Peters's avatar
Tim Peters committed
120

121
    If inpackage is given, it must be the dotted name of the package in
122 123
    which we are searching for a submodule, and then PATH must be the
    package search path; otherwise, we are searching for a top-level
124 125 126
    module, and path is combined with sys.path.
    """
    # Compute the full module name (prepending inpackage if set).
Christian Heimes's avatar
Christian Heimes committed
127
    if inpackage is not None:
128 129 130 131
        fullmodule = "%s.%s" % (inpackage, module)
    else:
        fullmodule = module

132
    # Check in the cache.
133 134 135
    if fullmodule in _modules:
        return _modules[fullmodule]

136 137
    # Initialize the dict for this module's contents.
    tree = {}
Tim Peters's avatar
Tim Peters committed
138

139
    # Check if it is a built-in module; we don't do much for these.
Christian Heimes's avatar
Christian Heimes committed
140
    if module in sys.builtin_module_names and inpackage is None:
141 142
        _modules[module] = tree
        return tree
143

144
    # Check for a dotted module name.
145
    i = module.rfind('.')
Tim Peters's avatar
Tim Peters committed
146
    if i >= 0:
147 148
        package = module[:i]
        submodule = module[i+1:]
149
        parent = _readmodule(package, path, inpackage)
Christian Heimes's avatar
Christian Heimes committed
150
        if inpackage is not None:
151
            package = "%s.%s" % (inpackage, package)
152 153
        if not '__path__' in parent:
            raise ImportError('No package named {}'.format(package))
154
        return _readmodule(submodule, parent['__path__'], package)
Tim Peters's avatar
Tim Peters committed
155

156
    # Search the path for the module.
Tim Peters's avatar
Tim Peters committed
157
    f = None
Christian Heimes's avatar
Christian Heimes committed
158
    if inpackage is not None:
159
        search_path = path
160
    else:
161
        search_path = path + sys.path
162
    spec = importlib.util._find_spec_from_path(fullmodule, search_path)
163 164
    _modules[fullmodule] = tree
    # Is module a package?
165
    if spec.submodule_search_locations is not None:
166
        tree['__path__'] = spec.submodule_search_locations
167
    try:
168
        source = spec.loader.get_source(fullmodule)
169
        if source is None:
170
            return tree
171
    except (AttributeError, ImportError):
172 173
        # If module is not Python source, we cannot do anything.
        return tree
Tim Peters's avatar
Tim Peters committed
174

175
    fname = spec.loader.get_filename(fullmodule)
176 177
    return _create_tree(fullmodule, path, fname, source, tree, inpackage)

178

179 180 181 182 183 184 185 186 187 188 189 190
def _create_tree(fullmodule, path, fname, source, tree, inpackage):
    """Return the tree for a particular module.

    fullmodule (full module name), inpackage+module, becomes o.module.
    path is passed to recursive calls of _readmodule.
    fname becomes o.file.
    source is tokenized.  Imports cause recursive calls to _readmodule.
    tree is {} or {'__path__': <submodule search locations>}.
    inpackage, None or string, is passed to recursive calls of _readmodule.

    The effect of recursive calls is mutation of global _modules.
    """
191 192
    f = io.StringIO(source)

193
    stack = [] # Initialize stack of (class, indent) pairs.
Tim Peters's avatar
Tim Peters committed
194

195 196
    g = tokenize.generate_tokens(f.readline)
    try:
Christian Heimes's avatar
Christian Heimes committed
197
        for tokentype, token, start, _end, _line in g:
198 199
            if tokentype == DEDENT:
                lineno, thisindent = start
200
                # Close previous nested classes and defs.
201 202 203
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
            elif token == 'def':
204
                lineno, thisindent = start
205
                # Close previous nested classes and defs.
206 207
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
208
                tokentype, func_name, start = next(g)[0:3]
209
                if tokentype != NAME:
210 211
                    continue  # Skip def with syntax error.
                cur_func = None
212
                if stack:
213 214
                    cur_obj = stack[-1][0]
                    cur_func = _nest_function(cur_obj, func_name, lineno)
215
                else:
216 217 218 219
                    # It is just a function.
                    cur_func = Function(fullmodule, func_name, fname, lineno)
                    tree[func_name] = cur_func
                stack.append((cur_func, thisindent))
220 221
            elif token == 'class':
                lineno, thisindent = start
222
                # Close previous nested classes and defs.
223 224
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
Christian Heimes's avatar
Christian Heimes committed
225
                tokentype, class_name, start = next(g)[0:3]
226
                if tokentype != NAME:
227 228
                    continue # Skip class with syntax error.
                # Parse what follows the class name.
Christian Heimes's avatar
Christian Heimes committed
229
                tokentype, token, start = next(g)[0:3]
230 231
                inherit = None
                if token == '(':
232
                    names = [] # Initialize list of superclasses.
233
                    level = 1
234
                    super = [] # Tokens making up current superclass.
235
                    while True:
Christian Heimes's avatar
Christian Heimes committed
236
                        tokentype, token, start = next(g)[0:3]
237 238
                        if token in (')', ',') and level == 1:
                            n = "".join(super)
239 240 241
                            if n in tree:
                                # We know this super class.
                                n = tree[n]
242 243 244
                            else:
                                c = n.split('.')
                                if len(c) > 1:
245 246
                                    # Super class form is module.class:
                                    # look in module for class.
247 248 249 250 251 252 253
                                    m = c[-2]
                                    c = c[-1]
                                    if m in _modules:
                                        d = _modules[m]
                                        if c in d:
                                            n = d[c]
                            names.append(n)
254
                            super = []
255 256 257 258 259 260 261 262
                        if token == '(':
                            level += 1
                        elif token == ')':
                            level -= 1
                            if level == 0:
                                break
                        elif token == ',' and level == 1:
                            pass
263
                        # Only use NAME and OP (== dot) tokens for type name.
264
                        elif tokentype in (NAME, OP) and level == 1:
265
                            super.append(token)
266
                        # Expressions in the base list are not supported.
267
                    inherit = names
268 269 270 271 272 273 274 275
                if stack:
                    cur_obj = stack[-1][0]
                    cur_class = _nest_class(
                            cur_obj, class_name, lineno, inherit)
                else:
                    cur_class = Class(fullmodule, class_name, inherit,
                                      fname, lineno)
                    tree[class_name] = cur_class
276
                stack.append((cur_class, thisindent))
277 278
            elif token == 'import' and start[1] == 0:
                modules = _getnamelist(g)
Christian Heimes's avatar
Christian Heimes committed
279
                for mod, _mod2 in modules:
280
                    try:
281
                        # Recursively read the imported module.
Christian Heimes's avatar
Christian Heimes committed
282
                        if inpackage is None:
283
                            _readmodule(mod, path)
284 285
                        else:
                            try:
286
                                _readmodule(mod, path, inpackage)
287
                            except ImportError:
288
                                _readmodule(mod, [])
289 290 291 292
                    except:
                        # If we can't find or parse the imported module,
                        # too bad -- don't die here.
                        pass
293 294 295 296 297
            elif token == 'from' and start[1] == 0:
                mod, token = _getname(g)
                if not mod or token != "import":
                    continue
                names = _getnamelist(g)
Tim Peters's avatar
Tim Peters committed
298
                try:
299
                    # Recursively read the imported module.
300
                    d = _readmodule(mod, path, inpackage)
Tim Peters's avatar
Tim Peters committed
301
                except:
302 303
                    # If we can't find or parse the imported module,
                    # too bad -- don't die here.
304
                    continue
305 306
                # Add any classes that were defined in the imported module
                # to our name space if they were mentioned in the list.
307 308
                for n, n2 in names:
                    if n in d:
309
                        tree[n2 or n] = d[n]
310
                    elif n == '*':
311
                        # Don't add names that start with _.
312
                        for n in d:
313
                            if n[0] != '_':
314
                                tree[n] = d[n]
315 316
    except StopIteration:
        pass
Tim Peters's avatar
Tim Peters committed
317

318
    f.close()
319 320
    return tree

321

322
def _getnamelist(g):
323 324 325 326
    """Return list of (dotted-name, as-name or None) tuples for token source g.

    An as-name is the name that follows 'as' in an as clause.
    """
327 328 329 330 331 332 333 334 335 336 337
    names = []
    while True:
        name, token = _getname(g)
        if not name:
            break
        if token == 'as':
            name2, token = _getname(g)
        else:
            name2 = None
        names.append((name, name2))
        while token != "," and "\n" not in token:
Christian Heimes's avatar
Christian Heimes committed
338
            token = next(g)[1]
339 340 341 342
        if token != ",":
            break
    return names

343

344
def _getname(g):
345
    "Return (dotted-name or None, next-token) tuple for token source g."
346
    parts = []
Christian Heimes's avatar
Christian Heimes committed
347
    tokentype, token = next(g)[0:2]
348 349 350 351
    if tokentype != NAME and token != '*':
        return (None, token)
    parts.append(token)
    while True:
Christian Heimes's avatar
Christian Heimes committed
352
        tokentype, token = next(g)[0:2]
353 354
        if token != '.':
            break
Christian Heimes's avatar
Christian Heimes committed
355
        tokentype, token = next(g)[0:2]
356 357 358 359
        if tokentype != NAME:
            break
        parts.append(token)
    return (".".join(parts), token)
360

361

362
def _main():
363
    "Print module output (default this file) for quick visual check."
364
    import os
365 366 367 368
    try:
        mod = sys.argv[1]
    except:
        mod = __file__
369 370 371 372 373 374 375
    if os.path.exists(mod):
        path = [os.path.dirname(mod)]
        mod = os.path.basename(mod)
        if mod.lower().endswith(".py"):
            mod = mod[:-3]
    else:
        path = []
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
    tree = readmodule_ex(mod, path)
    lineno_key = lambda a: getattr(a, 'lineno', 0)
    objs = sorted(tree.values(), key=lineno_key, reverse=True)
    indent_level = 2
    while objs:
        obj = objs.pop()
        if isinstance(obj, list):
            # Value is a __path__ key.
            continue
        if not hasattr(obj, 'indent'):
            obj.indent = 0

        if isinstance(obj, _Object):
            new_objs = sorted(obj.children.values(),
                              key=lineno_key, reverse=True)
            for ob in new_objs:
                ob.indent = obj.indent + indent_level
            objs.extend(new_objs)
394
        if isinstance(obj, Class):
395 396
            print("{}class {} {} {}"
                  .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
397
        elif isinstance(obj, Function):
398
            print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
399 400 401

if __name__ == "__main__":
    _main()