modulefinder.py 22.5 KB
Newer Older
1
"""Find modules used by a script, using introspection."""
2

3
import dis
4
import importlib._bootstrap_external
5
import importlib.machinery
6 7 8
import marshal
import os
import sys
9
import types
10 11
import warnings
with warnings.catch_warnings():
12
    warnings.simplefilter('ignore', DeprecationWarning)
13
    import imp
14

15 16 17 18
LOAD_CONST = dis.opmap['LOAD_CONST']
IMPORT_NAME = dis.opmap['IMPORT_NAME']
STORE_NAME = dis.opmap['STORE_NAME']
STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
19
STORE_OPS = STORE_NAME, STORE_GLOBAL
20
EXTENDED_ARG = dis.EXTENDED_ARG
21

22 23 24
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime.  Therefore there
# is a mechanism whereby you can register extra paths in this map for a
25
# package, and it will be honored.
26 27 28 29 30 31

# Note this is a mapping is lists of paths.
packagePathMap = {}

# A Public interface
def AddPackagePath(packagename, path):
32
    packagePathMap.setdefault(packagename, []).append(path)
33

34 35
replacePackageMap = {}

36 37 38 39
# This ReplacePackage mechanism allows modulefinder to work around
# situations in which a package injects itself under the name
# of another package into sys.modules at runtime by calling
# ReplacePackage("real_package_name", "faked_package_name")
40 41 42 43 44 45
# before running ModuleFinder.

def ReplacePackage(oldname, newname):
    replacePackageMap[oldname] = newname


46 47 48
class Module:

    def __init__(self, name, file=None, path=None):
49 50 51 52
        self.__name__ = name
        self.__file__ = file
        self.__path__ = path
        self.__code__ = None
53 54 55 56 57 58 59
        # The set of global names that are assigned to in the module.
        # This includes those names imported through starimports of
        # Python modules.
        self.globalnames = {}
        # The set of starimports this module did that could not be
        # resolved, ie. a starimport from a non-Python module.
        self.starimports = {}
60 61

    def __repr__(self):
Neil Schemenauer's avatar
Neil Schemenauer committed
62
        s = "Module(%r" % (self.__name__,)
63
        if self.__file__ is not None:
64
            s = s + ", %r" % (self.__file__,)
65
        if self.__path__ is not None:
66
            s = s + ", %r" % (self.__path__,)
67 68
        s = s + ")"
        return s
69 70 71

class ModuleFinder:

72
    def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
73 74 75 76 77 78 79
        if path is None:
            path = sys.path
        self.path = path
        self.modules = {}
        self.badmodules = {}
        self.debug = debug
        self.indent = 0
80
        self.excludes = excludes
81 82
        self.replace_paths = replace_paths
        self.processed_paths = []   # Used in debugging only
83 84

    def msg(self, level, str, *args):
85 86
        if level <= self.debug:
            for i in range(self.indent):
87 88
                print("   ", end=' ')
            print(str, end=' ')
89
            for arg in args:
90 91
                print(repr(arg), end=' ')
            print()
92 93

    def msgin(self, *args):
94 95 96
        level = args[0]
        if level <= self.debug:
            self.indent = self.indent + 1
97
            self.msg(*args)
98 99

    def msgout(self, *args):
100 101 102
        level = args[0]
        if level <= self.debug:
            self.indent = self.indent - 1
103
            self.msg(*args)
104 105

    def run_script(self, pathname):
106
        self.msg(2, "run_script", pathname)
107
        with open(pathname) as fp:
108 109
            stuff = ("", "r", imp.PY_SOURCE)
            self.load_module('__main__', fp, pathname, stuff)
110 111

    def load_file(self, pathname):
112 113
        dir, name = os.path.split(pathname)
        name, ext = os.path.splitext(name)
114
        with open(pathname) as fp:
115 116
            stuff = (ext, "r", imp.PY_SOURCE)
            self.load_module(name, fp, pathname, stuff)
117

118 119 120
    def import_hook(self, name, caller=None, fromlist=None, level=-1):
        self.msg(3, "import_hook", name, caller, fromlist, level)
        parent = self.determine_parent(caller, level=level)
121 122 123 124 125 126
        q, tail = self.find_head_package(parent, name)
        m = self.load_tail(q, tail)
        if not fromlist:
            return q
        if m.__path__:
            self.ensure_fromlist(m, fromlist)
127
        return None
128

129 130 131
    def determine_parent(self, caller, level=-1):
        self.msgin(4, "determine_parent", caller, level)
        if not caller or level == 0:
132 133 134
            self.msgout(4, "determine_parent -> None")
            return None
        pname = caller.__name__
135 136 137 138 139 140 141 142 143
        if level >= 1: # relative import
            if caller.__path__:
                level -= 1
            if level == 0:
                parent = self.modules[pname]
                assert parent is caller
                self.msgout(4, "determine_parent ->", parent)
                return parent
            if pname.count(".") < level:
144
                raise ImportError("relative importpath too deep")
145 146 147 148
            pname = ".".join(pname.split(".")[:-level])
            parent = self.modules[pname]
            self.msgout(4, "determine_parent ->", parent)
            return parent
149 150 151 152 153 154
        if caller.__path__:
            parent = self.modules[pname]
            assert caller is parent
            self.msgout(4, "determine_parent ->", parent)
            return parent
        if '.' in pname:
155
            i = pname.rfind('.')
156 157 158 159 160 161 162
            pname = pname[:i]
            parent = self.modules[pname]
            assert parent.__name__ == pname
            self.msgout(4, "determine_parent ->", parent)
            return parent
        self.msgout(4, "determine_parent -> None")
        return None
163 164

    def find_head_package(self, parent, name):
165 166
        self.msgin(4, "find_head_package", parent, name)
        if '.' in name:
167
            i = name.find('.')
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
            head = name[:i]
            tail = name[i+1:]
        else:
            head = name
            tail = ""
        if parent:
            qname = "%s.%s" % (parent.__name__, head)
        else:
            qname = head
        q = self.import_module(head, qname, parent)
        if q:
            self.msgout(4, "find_head_package ->", (q, tail))
            return q, tail
        if parent:
            qname = head
            parent = None
            q = self.import_module(head, qname, parent)
            if q:
                self.msgout(4, "find_head_package ->", (q, tail))
                return q, tail
        self.msgout(4, "raise ImportError: No module named", qname)
189
        raise ImportError("No module named " + qname)
190 191

    def load_tail(self, q, tail):
192 193 194
        self.msgin(4, "load_tail", q, tail)
        m = q
        while tail:
195
            i = tail.find('.')
196 197 198 199 200 201
            if i < 0: i = len(tail)
            head, tail = tail[:i], tail[i+1:]
            mname = "%s.%s" % (m.__name__, head)
            m = self.import_module(head, mname, m)
            if not m:
                self.msgout(4, "raise ImportError: No module named", mname)
202
                raise ImportError("No module named " + mname)
203 204
        self.msgout(4, "load_tail ->", m)
        return m
205 206

    def ensure_fromlist(self, m, fromlist, recursive=0):
207 208 209 210 211 212 213 214 215 216 217
        self.msg(4, "ensure_fromlist", m, fromlist, recursive)
        for sub in fromlist:
            if sub == "*":
                if not recursive:
                    all = self.find_all_submodules(m)
                    if all:
                        self.ensure_fromlist(m, all, 1)
            elif not hasattr(m, sub):
                subname = "%s.%s" % (m.__name__, sub)
                submod = self.import_module(sub, subname, m)
                if not submod:
218
                    raise ImportError("No module named " + subname)
219 220

    def find_all_submodules(self, m):
221 222 223
        if not m.__path__:
            return
        modules = {}
224
        # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
225 226 227
        # But we must also collect Python extension modules - although
        # we cannot separate normal dlls from Python extensions.
        suffixes = []
228 229 230
        suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
        suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
        suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
231 232 233
        for dir in m.__path__:
            try:
                names = os.listdir(dir)
234
            except OSError:
235 236 237 238 239 240 241 242 243 244 245 246
                self.msg(2, "can't list directory", dir)
                continue
            for name in names:
                mod = None
                for suff in suffixes:
                    n = len(suff)
                    if name[-n:] == suff:
                        mod = name[:-n]
                        break
                if mod and mod != "__init__":
                    modules[mod] = mod
        return modules.keys()
247 248

    def import_module(self, partname, fqname, parent):
249 250 251 252 253 254 255 256
        self.msgin(3, "import_module", partname, fqname, parent)
        try:
            m = self.modules[fqname]
        except KeyError:
            pass
        else:
            self.msgout(3, "import_module ->", m)
            return m
257
        if fqname in self.badmodules:
258 259
            self.msgout(3, "import_module -> None")
            return None
260 261 262
        if parent and parent.__path__ is None:
            self.msgout(3, "import_module -> None")
            return None
263 264
        try:
            fp, pathname, stuff = self.find_module(partname,
265
                                                   parent and parent.__path__, parent)
266 267 268 269 270 271
        except ImportError:
            self.msgout(3, "import_module ->", None)
            return None
        try:
            m = self.load_module(fqname, fp, pathname, stuff)
        finally:
272 273
            if fp:
                fp.close()
274 275 276 277
        if parent:
            setattr(parent, partname, m)
        self.msgout(3, "import_module ->", m)
        return m
278

279 280
    def load_module(self, fqname, fp, pathname, file_info):
        suffix, mode, type = file_info
281 282 283 284 285 286
        self.msgin(2, "load_module", fqname, fp and "fp", pathname)
        if type == imp.PKG_DIRECTORY:
            m = self.load_package(fqname, pathname)
            self.msgout(2, "load_module ->", m)
            return m
        if type == imp.PY_SOURCE:
287
            co = compile(fp.read()+'\n', pathname, 'exec')
288
        elif type == imp.PY_COMPILED:
Brett Cannon's avatar
Brett Cannon committed
289
            try:
290 291
                data = fp.read()
                importlib._bootstrap_external._classify_pyc(data, fqname, {})
Brett Cannon's avatar
Brett Cannon committed
292 293 294
            except ImportError as exc:
                self.msgout(2, "raise ImportError: " + str(exc), pathname)
                raise
295
            co = marshal.loads(memoryview(data)[16:])
296 297 298
        else:
            co = None
        m = self.add_module(fqname)
299
        m.__file__ = pathname
300
        if co:
301 302
            if self.replace_paths:
                co = self.replace_paths_in_code(co)
303
            m.__code__ = co
304
            self.scan_code(co, m)
305 306
        self.msgout(2, "load_module ->", m)
        return m
307

308 309 310
    def _add_badmodule(self, name, caller):
        if name not in self.badmodules:
            self.badmodules[name] = {}
311 312 313 314
        if caller:
            self.badmodules[name][caller.__name__] = 1
        else:
            self.badmodules[name]["-"] = 1
315

316
    def _safe_import_hook(self, name, caller, fromlist, level=-1):
317 318 319 320 321
        # wrapper for self.import_hook() that won't raise ImportError
        if name in self.badmodules:
            self._add_badmodule(name, caller)
            return
        try:
322
            self.import_hook(name, caller, level=level)
323
        except ImportError as msg:
324 325 326 327 328 329 330 331 332
            self.msg(2, "ImportError:", str(msg))
            self._add_badmodule(name, caller)
        else:
            if fromlist:
                for sub in fromlist:
                    if sub in self.badmodules:
                        self._add_badmodule(sub, caller)
                        continue
                    try:
333
                        self.import_hook(name, caller, [sub], level=level)
334
                    except ImportError as msg:
335 336 337 338
                        self.msg(2, "ImportError:", str(msg))
                        fullname = name + "." + sub
                        self._add_badmodule(fullname, caller)

339
    def scan_opcodes(self, co):
340 341 342 343
        # Scan the code, and yield 'interesting' opcode combinations
        code = co.co_code
        names = co.co_names
        consts = co.co_consts
344 345 346 347
        opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
                  if op != EXTENDED_ARG]
        for i, (op, oparg) in enumerate(opargs):
            if op in STORE_OPS:
348 349
                yield "store", (names[oparg],)
                continue
350 351 352 353
            if (op == IMPORT_NAME and i >= 2
                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
                level = consts[opargs[i-2][1]]
                fromlist = consts[opargs[i-1][1]]
354
                if level == 0: # absolute import
355
                    yield "absolute_import", (fromlist, names[oparg])
356
                else: # relative import
357
                    yield "relative_import", (level, fromlist, names[oparg])
358 359
                continue

360 361
    def scan_code(self, co, m):
        code = co.co_code
362
        scanner = self.scan_opcodes
363 364 365 366 367 368
        for what, args in scanner(co):
            if what == "store":
                name, = args
                m.globalnames[name] = 1
            elif what == "absolute_import":
                fromlist, name = args
369 370 371 372 373
                have_star = 0
                if fromlist is not None:
                    if "*" in fromlist:
                        have_star = 1
                    fromlist = [f for f in fromlist if f != "*"]
374
                self._safe_import_hook(name, m, fromlist, level=0)
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
                if have_star:
                    # We've encountered an "import *". If it is a Python module,
                    # the code has already been parsed and we can suck out the
                    # global names.
                    mm = None
                    if m.__path__:
                        # At this point we don't know whether 'name' is a
                        # submodule of 'm' or a global module. Let's just try
                        # the full name first.
                        mm = self.modules.get(m.__name__ + "." + name)
                    if mm is None:
                        mm = self.modules.get(name)
                    if mm is not None:
                        m.globalnames.update(mm.globalnames)
                        m.starimports.update(mm.starimports)
                        if mm.__code__ is None:
                            m.starimports[name] = 1
                    else:
                        m.starimports[name] = 1
394 395 396 397 398 399 400 401 402 403 404
            elif what == "relative_import":
                level, fromlist, name = args
                if name:
                    self._safe_import_hook(name, m, fromlist, level=level)
                else:
                    parent = self.determine_parent(m, level=level)
                    self._safe_import_hook(parent.__name__, None, fromlist, level=0)
            else:
                # We don't expect anything else from the generator.
                raise RuntimeError(what)

405 406 407 408
        for c in co.co_consts:
            if isinstance(c, type(co)):
                self.scan_code(c, m)

409
    def load_package(self, fqname, pathname):
410
        self.msgin(2, "load_package", fqname, pathname)
411 412 413
        newname = replacePackageMap.get(fqname)
        if newname:
            fqname = newname
414 415 416
        m = self.add_module(fqname)
        m.__file__ = pathname
        m.__path__ = [pathname]
417

Guido van Rossum's avatar
Guido van Rossum committed
418 419
        # As per comment at top of file, simulate runtime __path__ additions.
        m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
420

421
        fp, buf, stuff = self.find_module("__init__", m.__path__)
422 423 424 425 426 427 428
        try:
            self.load_module(fqname, fp, buf, stuff)
            self.msgout(2, "load_package ->", m)
            return m
        finally:
            if fp:
                fp.close()
429 430

    def add_module(self, fqname):
431
        if fqname in self.modules:
432 433 434
            return self.modules[fqname]
        self.modules[fqname] = m = Module(fqname)
        return m
435

436 437
    def find_module(self, name, path, parent=None):
        if parent is not None:
438
            # assert path is not None
439
            fullname = parent.__name__+'.'+name
440 441 442 443
        else:
            fullname = name
        if fullname in self.excludes:
            self.msgout(3, "find_module -> Excluded", fullname)
444
            raise ImportError(name)
445

446 447 448
        if path is None:
            if name in sys.builtin_module_names:
                return (None, None, ("", "", imp.C_BUILTIN))
449

450 451
            path = self.path
        return imp.find_module(name, path)
452 453

    def report(self):
454 455 456
        """Print a report to stdout, listing the found modules with their
        paths, as well as modules that are missing, or seem to be missing.
        """
457 458 459
        print()
        print("  %-25s %s" % ("Name", "File"))
        print("  %-25s %s" % ("----", "----"))
460
        # Print modules found
461
        keys = sorted(self.modules.keys())
462 463 464
        for key in keys:
            m = self.modules[key]
            if m.__path__:
465
                print("P", end=' ')
466
            else:
467 468
                print("m", end=' ')
            print("%-25s" % key, m.__file__ or "")
469 470

        # Print missing modules
471 472
        missing, maybe = self.any_missing_maybe()
        if missing:
473 474
            print()
            print("Missing modules:")
475
            for name in missing:
476
                mods = sorted(self.badmodules[name].keys())
477
                print("?", name, "imported from", ', '.join(mods))
478 479
        # Print modules that may be missing, but then again, maybe not...
        if maybe:
480
            print()
481
            print("Submodules that appear to be missing, but could also be", end=' ')
482
            print("global names in the parent package:")
483
            for name in maybe:
484
                mods = sorted(self.badmodules[name].keys())
485
                print("?", name, "imported from", ', '.join(mods))
486

487
    def any_missing(self):
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
        """Return a list of modules that appear to be missing. Use
        any_missing_maybe() if you want to know which modules are
        certain to be missing, and which *may* be missing.
        """
        missing, maybe = self.any_missing_maybe()
        return missing + maybe

    def any_missing_maybe(self):
        """Return two lists, one with modules that are certainly missing
        and one with modules that *may* be missing. The latter names could
        either be submodules *or* just global names in the package.

        The reason it can't always be determined is that it's impossible to
        tell which names are imported when "from module import *" is done
        with an extension module, short of actually importing it.
        """
504
        missing = []
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
        maybe = []
        for name in self.badmodules:
            if name in self.excludes:
                continue
            i = name.rfind(".")
            if i < 0:
                missing.append(name)
                continue
            subname = name[i+1:]
            pkgname = name[:i]
            pkg = self.modules.get(pkgname)
            if pkg is not None:
                if pkgname in self.badmodules[name]:
                    # The package tried to import this module itself and
                    # failed. It's definitely missing.
                    missing.append(name)
                elif subname in pkg.globalnames:
                    # It's a global in the package: definitely not missing.
                    pass
                elif pkg.starimports:
                    # It could be missing, but the package did an "import *"
                    # from a non-Python module, so we simply can't be sure.
                    maybe.append(name)
                else:
                    # It's not a global in the package, the package didn't
                    # do funny star imports, it's very likely to be missing.
                    # The symbol could be inserted into the package from the
                    # outside, but since that's not good style we simply list
                    # it missing.
                    missing.append(name)
            else:
                missing.append(name)
        missing.sort()
        maybe.sort()
        return missing, maybe
540

541 542
    def replace_paths_in_code(self, co):
        new_filename = original_filename = os.path.normpath(co.co_filename)
543
        for f, r in self.replace_paths:
544
            if original_filename.startswith(f):
545
                new_filename = r + original_filename[len(f):]
546 547 548
                break

        if self.debug and original_filename not in self.processed_paths:
549
            if new_filename != original_filename:
550 551 552 553 554 555 556 557 558 559 560 561
                self.msgout(2, "co_filename %r changed to %r" \
                                    % (original_filename,new_filename,))
            else:
                self.msgout(2, "co_filename %r remains unchanged" \
                                    % (original_filename,))
            self.processed_paths.append(original_filename)

        consts = list(co.co_consts)
        for i in range(len(consts)):
            if isinstance(consts[i], type(co)):
                consts[i] = self.replace_paths_in_code(consts[i])

562 563 564 565 566 567
        return types.CodeType(co.co_argcount, co.co_kwonlyargcount,
                              co.co_nlocals, co.co_stacksize, co.co_flags,
                              co.co_code, tuple(consts), co.co_names,
                              co.co_varnames, new_filename, co.co_name,
                              co.co_firstlineno, co.co_lnotab, co.co_freevars,
                              co.co_cellvars)
568

569 570 571 572 573

def test():
    # Parse command line
    import getopt
    try:
574
        opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
575
    except getopt.error as msg:
576
        print(msg)
577
        return
578 579 580 581 582

    # Process options
    debug = 1
    domods = 0
    addpath = []
583
    exclude = []
584
    for o, a in opts:
585 586 587 588 589
        if o == '-d':
            debug = debug + 1
        if o == '-m':
            domods = 1
        if o == '-p':
590
            addpath = addpath + a.split(os.pathsep)
591 592
        if o == '-q':
            debug = 0
593 594
        if o == '-x':
            exclude.append(a)
595 596 597

    # Provide default arguments
    if not args:
598
        script = "hello.py"
599
    else:
600
        script = args[0]
601 602 603 604 605 606

    # Set the path based on sys.path and the script directory
    path = sys.path[:]
    path[0] = os.path.dirname(script)
    path = addpath + path
    if debug > 1:
607
        print("path:")
608
        for item in path:
609
            print("   ", repr(item))
610 611

    # Create the module finder and turn its crank
612
    mf = ModuleFinder(path, debug, exclude)
613
    for arg in args[1:]:
614 615 616 617 618 619 620 621
        if arg == '-m':
            domods = 1
            continue
        if domods:
            if arg[-2:] == '.*':
                mf.import_hook(arg[:-2], None, ["*"])
            else:
                mf.import_hook(arg)
622
        else:
623
            mf.load_file(arg)
624 625
    mf.run_script(script)
    mf.report()
626
    return mf  # for -i debugging
627 628 629 630


if __name__ == '__main__':
    try:
631
        mf = test()
632
    except KeyboardInterrupt:
633
        print("\n[interrupted]")