pyclbr.py 13.2 KB
Newer Older
1
"""Parse a Python module and describe its classes and methods.
2

3 4
Parse enough of a Python file to recognize imports and class and
method definitions, and to find out the superclasses of a class.
5 6

The interface consists of a single function:
7 8 9 10 11 12 13 14 15 16
        readmodule_ex(module [, path])
where module is the name of a Python module, and path is an optional
list of directories where the module is to be searched.  If present,
path is prepended to the system search path sys.path.  The return
value is a dictionary.  The keys of the dictionary are the names of
the classes defined in the module (including classes that are defined
via the from XXX import YYY construct).  The values are class
instances of the class Class defined here.  One special key/value pair
is present for packages: the key '__path__' has a list as its value
which contains the package search path.
17 18 19

A class is described by the class Class in this module.  Instances
of this class have the following instance variables:
20
        module -- the module name
Tim Peters's avatar
Tim Peters committed
21 22 23 24 25
        name -- the name of the class
        super -- a list of super classes (Class instances)
        methods -- a dictionary of methods
        file -- the file in which the class was defined
        lineno -- the line in the file on which the class statement occurred
26 27 28 29 30 31 32 33
The dictionary of methods uses the method names as keys and the line
numbers on which the method was defined as values.
If the name of a super class is not recognized, the corresponding
entry in the list of super classes is not a class instance but a
string giving the name of the super class.  Since import statements
are recognized and imported modules are scanned as well, this
shouldn't happen often.

34 35 36 37 38 39
A function is described by the class Function in this module.
Instances of this class have the following instance variables:
        module -- the module name
        name -- the name of the class
        file -- the file in which the class was defined
        lineno -- the line in the file on which the class statement occurred
40
"""
41

42
import io
43
import sys
44
import importlib.util
Christian Heimes's avatar
Christian Heimes committed
45 46
import tokenize
from token import NAME, DEDENT, OP
47

48
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
49

50
_modules = {}                           # cache of modules we've seen
51 52 53

# each Python class is represented by an instance of this class
class Class:
Tim Peters's avatar
Tim Peters committed
54 55 56 57 58 59 60 61 62 63 64 65 66
    '''Class to represent a Python class.'''
    def __init__(self, module, name, super, file, lineno):
        self.module = module
        self.name = name
        if super is None:
            super = []
        self.super = super
        self.methods = {}
        self.file = file
        self.lineno = lineno

    def _addmethod(self, name, lineno):
        self.methods[name] = lineno
67

68
class Function:
Tim Peters's avatar
Tim Peters committed
69 70
    '''Class to represent a top-level Python function'''
    def __init__(self, module, name, file, lineno):
71 72 73 74
        self.module = module
        self.name = name
        self.file = file
        self.lineno = lineno
75

Christian Heimes's avatar
Christian Heimes committed
76
def readmodule(module, path=None):
Tim Peters's avatar
Tim Peters committed
77
    '''Backwards compatible interface.
78

79
    Call readmodule_ex() and then only keep Class objects from the
Tim Peters's avatar
Tim Peters committed
80
    resulting dictionary.'''
81

Tim Peters's avatar
Tim Peters committed
82
    res = {}
Christian Heimes's avatar
Christian Heimes committed
83
    for key, value in _readmodule(module, path or []).items():
84
        if isinstance(value, Class):
Tim Peters's avatar
Tim Peters committed
85 86
            res[key] = value
    return res
87

Christian Heimes's avatar
Christian Heimes committed
88
def readmodule_ex(module, path=None):
Tim Peters's avatar
Tim Peters committed
89 90 91 92
    '''Read a module file and return a dictionary of classes.

    Search for MODULE in PATH and sys.path, read and parse the
    module and return a dictionary with one entry for each class
93
    found in the module.
Christian Heimes's avatar
Christian Heimes committed
94 95 96 97 98
    '''
    return _readmodule(module, path or [])

def _readmodule(module, path, inpackage=None):
    '''Do the hard work for readmodule[_ex].
Tim Peters's avatar
Tim Peters committed
99

Christian Heimes's avatar
Christian Heimes committed
100
    If INPACKAGE is given, it must be the dotted name of the package in
101 102 103 104 105
    which we are searching for a submodule, and then PATH must be the
    package search path; otherwise, we are searching for a top-level
    module, and PATH is combined with sys.path.
    '''
    # Compute the full module name (prepending inpackage if set)
Christian Heimes's avatar
Christian Heimes committed
106
    if inpackage is not None:
107 108 109 110 111 112 113 114 115
        fullmodule = "%s.%s" % (inpackage, module)
    else:
        fullmodule = module

    # Check in the cache
    if fullmodule in _modules:
        return _modules[fullmodule]

    # Initialize the dict for this module's contents
Tim Peters's avatar
Tim Peters committed
116 117
    dict = {}

118
    # Check if it is a built-in module; we don't do much for these
Christian Heimes's avatar
Christian Heimes committed
119
    if module in sys.builtin_module_names and inpackage is None:
120 121 122 123
        _modules[module] = dict
        return dict

    # Check for a dotted module name
124
    i = module.rfind('.')
Tim Peters's avatar
Tim Peters committed
125
    if i >= 0:
126 127
        package = module[:i]
        submodule = module[i+1:]
128
        parent = _readmodule(package, path, inpackage)
Christian Heimes's avatar
Christian Heimes committed
129
        if inpackage is not None:
130
            package = "%s.%s" % (inpackage, package)
131 132
        if not '__path__' in parent:
            raise ImportError('No package named {}'.format(package))
133
        return _readmodule(submodule, parent['__path__'], package)
Tim Peters's avatar
Tim Peters committed
134

135
    # Search the path for the module
Tim Peters's avatar
Tim Peters committed
136
    f = None
Christian Heimes's avatar
Christian Heimes committed
137
    if inpackage is not None:
138
        search_path = path
139
    else:
140
        search_path = path + sys.path
141
    # XXX This will change once issue19944 lands.
142
    spec = importlib.util._find_spec_from_path(fullmodule, search_path)
143
    _modules[fullmodule] = dict
144 145 146
    # is module a package?
    if spec.submodule_search_locations is not None:
        dict['__path__'] = spec.submodule_search_locations
147
    try:
148
        source = spec.loader.get_source(fullmodule)
149 150 151
        if source is None:
            return dict
    except (AttributeError, ImportError):
Tim Peters's avatar
Tim Peters committed
152 153 154
        # not Python source, can't do anything with this module
        return dict

155 156
    fname = spec.loader.get_filename(fullmodule)

157 158
    f = io.StringIO(source)

159
    stack = [] # stack of (class, indent) pairs
Tim Peters's avatar
Tim Peters committed
160

161 162
    g = tokenize.generate_tokens(f.readline)
    try:
Christian Heimes's avatar
Christian Heimes committed
163
        for tokentype, token, start, _end, _line in g:
164 165 166 167 168 169
            if tokentype == DEDENT:
                lineno, thisindent = start
                # close nested classes and defs
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
            elif token == 'def':
170
                lineno, thisindent = start
171 172 173
                # close previous nested classes and defs
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
Christian Heimes's avatar
Christian Heimes committed
174
                tokentype, meth_name, start = next(g)[0:3]
175 176
                if tokentype != NAME:
                    continue # Syntax error
177 178 179 180 181 182
                if stack:
                    cur_class = stack[-1][0]
                    if isinstance(cur_class, Class):
                        # it's a method
                        cur_class._addmethod(meth_name, lineno)
                    # else it's a nested def
183 184
                else:
                    # it's a function
Christian Heimes's avatar
Christian Heimes committed
185 186
                    dict[meth_name] = Function(fullmodule, meth_name,
                                               fname, lineno)
187
                stack.append((None, thisindent)) # Marker for nested fns
188 189
            elif token == 'class':
                lineno, thisindent = start
190 191 192
                # close previous nested classes and defs
                while stack and stack[-1][1] >= thisindent:
                    del stack[-1]
Christian Heimes's avatar
Christian Heimes committed
193
                tokentype, class_name, start = next(g)[0:3]
194 195 196
                if tokentype != NAME:
                    continue # Syntax error
                # parse what follows the class name
Christian Heimes's avatar
Christian Heimes committed
197
                tokentype, token, start = next(g)[0:3]
198 199 200 201 202 203 204
                inherit = None
                if token == '(':
                    names = [] # List of superclasses
                    # there's a list of superclasses
                    level = 1
                    super = [] # Tokens making up current superclass
                    while True:
Christian Heimes's avatar
Christian Heimes committed
205
                        tokentype, token, start = next(g)[0:3]
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
                        if token in (')', ',') and level == 1:
                            n = "".join(super)
                            if n in dict:
                                # we know this super class
                                n = dict[n]
                            else:
                                c = n.split('.')
                                if len(c) > 1:
                                    # super class is of the form
                                    # module.class: look in module for
                                    # class
                                    m = c[-2]
                                    c = c[-1]
                                    if m in _modules:
                                        d = _modules[m]
                                        if c in d:
                                            n = d[c]
                            names.append(n)
224
                            super = []
225 226 227 228 229 230 231 232
                        if token == '(':
                            level += 1
                        elif token == ')':
                            level -= 1
                            if level == 0:
                                break
                        elif token == ',' and level == 1:
                            pass
233 234
                        # only use NAME and OP (== dot) tokens for type name
                        elif tokentype in (NAME, OP) and level == 1:
235
                            super.append(token)
236
                        # expressions in the base list are not supported
237
                    inherit = names
Christian Heimes's avatar
Christian Heimes committed
238 239
                cur_class = Class(fullmodule, class_name, inherit,
                                  fname, lineno)
240 241 242
                if not stack:
                    dict[class_name] = cur_class
                stack.append((cur_class, thisindent))
243 244
            elif token == 'import' and start[1] == 0:
                modules = _getnamelist(g)
Christian Heimes's avatar
Christian Heimes committed
245
                for mod, _mod2 in modules:
246 247
                    try:
                        # Recursively read the imported module
Christian Heimes's avatar
Christian Heimes committed
248
                        if inpackage is None:
249
                            _readmodule(mod, path)
250 251
                        else:
                            try:
252
                                _readmodule(mod, path, inpackage)
253
                            except ImportError:
254
                                _readmodule(mod, [])
255 256 257 258
                    except:
                        # If we can't find or parse the imported module,
                        # too bad -- don't die here.
                        pass
259 260 261 262 263
            elif token == 'from' and start[1] == 0:
                mod, token = _getname(g)
                if not mod or token != "import":
                    continue
                names = _getnamelist(g)
Tim Peters's avatar
Tim Peters committed
264
                try:
265
                    # Recursively read the imported module
266
                    d = _readmodule(mod, path, inpackage)
Tim Peters's avatar
Tim Peters committed
267
                except:
268 269
                    # If we can't find or parse the imported module,
                    # too bad -- don't die here.
270 271 272 273 274 275 276
                    continue
                # add any classes that were defined in the imported module
                # to our name space if they were mentioned in the list
                for n, n2 in names:
                    if n in d:
                        dict[n2 or n] = d[n]
                    elif n == '*':
277
                        # don't add names that start with _
278
                        for n in d:
279
                            if n[0] != '_':
280 281 282
                                dict[n] = d[n]
    except StopIteration:
        pass
Tim Peters's avatar
Tim Peters committed
283

284
    f.close()
Tim Peters's avatar
Tim Peters committed
285
    return dict
286

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
def _getnamelist(g):
    # Helper to get a comma-separated list of dotted names plus 'as'
    # clauses.  Return a list of pairs (name, name2) where name2 is
    # the 'as' name, or None if there is no 'as' clause.
    names = []
    while True:
        name, token = _getname(g)
        if not name:
            break
        if token == 'as':
            name2, token = _getname(g)
        else:
            name2 = None
        names.append((name, name2))
        while token != "," and "\n" not in token:
Christian Heimes's avatar
Christian Heimes committed
302
            token = next(g)[1]
303 304 305 306 307 308 309 310 311
        if token != ",":
            break
    return names

def _getname(g):
    # Helper to get a dotted name, return a pair (name, token) where
    # name is the dotted name, or None if there was no dotted name,
    # and token is the next input token.
    parts = []
Christian Heimes's avatar
Christian Heimes committed
312
    tokentype, token = next(g)[0:2]
313 314 315 316
    if tokentype != NAME and token != '*':
        return (None, token)
    parts.append(token)
    while True:
Christian Heimes's avatar
Christian Heimes committed
317
        tokentype, token = next(g)[0:2]
318 319
        if token != '.':
            break
Christian Heimes's avatar
Christian Heimes committed
320
        tokentype, token = next(g)[0:2]
321 322 323 324
        if tokentype != NAME:
            break
        parts.append(token)
    return (".".join(parts), token)
325 326 327 328

def _main():
    # Main program for testing.
    import os
329
    from operator import itemgetter
330 331 332 333 334 335 336 337 338
    mod = sys.argv[1]
    if os.path.exists(mod):
        path = [os.path.dirname(mod)]
        mod = os.path.basename(mod)
        if mod.lower().endswith(".py"):
            mod = mod[:-3]
    else:
        path = []
    dict = readmodule_ex(mod, path)
339
    objs = list(dict.values())
340
    objs.sort(key=lambda a: getattr(a, 'lineno', 0))
341 342
    for obj in objs:
        if isinstance(obj, Class):
343
            print("class", obj.name, obj.super, obj.lineno)
344
            methods = sorted(obj.methods.items(), key=itemgetter(1))
345 346
            for name, lineno in methods:
                if name != "__path__":
347
                    print("  def", name, lineno)
348
        elif isinstance(obj, Function):
349
            print("def", obj.name, obj.lineno)
350 351 352

if __name__ == "__main__":
    _main()