modulefinder.py 23.4 KB
Newer Older
1
"""Find modules used by a script, using introspection."""
2

3 4 5 6 7
import dis
import imp
import marshal
import os
import sys
8
import types
9
import struct
10

11
# XXX Clean up once str8's cstor matches bytes.
12 13 14 15
LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
16
STORE_OPS = [STORE_NAME, STORE_GLOBAL]
17
HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
18

19 20 21
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime.  Therefore there
# is a mechanism whereby you can register extra paths in this map for a
22
# package, and it will be honored.
23 24 25 26 27 28

# Note this is a mapping is lists of paths.
packagePathMap = {}

# A Public interface
def AddPackagePath(packagename, path):
29
    packagePathMap.setdefault(packagename, []).append(path)
30

31 32
replacePackageMap = {}

33 34 35 36
# This ReplacePackage mechanism allows modulefinder to work around
# situations in which a package injects itself under the name
# of another package into sys.modules at runtime by calling
# ReplacePackage("real_package_name", "faked_package_name")
37 38 39 40 41 42
# before running ModuleFinder.

def ReplacePackage(oldname, newname):
    replacePackageMap[oldname] = newname


43 44 45
class Module:

    def __init__(self, name, file=None, path=None):
46 47 48 49
        self.__name__ = name
        self.__file__ = file
        self.__path__ = path
        self.__code__ = None
50 51 52 53 54 55 56
        # The set of global names that are assigned to in the module.
        # This includes those names imported through starimports of
        # Python modules.
        self.globalnames = {}
        # The set of starimports this module did that could not be
        # resolved, ie. a starimport from a non-Python module.
        self.starimports = {}
57 58

    def __repr__(self):
Neil Schemenauer's avatar
Neil Schemenauer committed
59
        s = "Module(%r" % (self.__name__,)
60
        if self.__file__ is not None:
61
            s = s + ", %r" % (self.__file__,)
62
        if self.__path__ is not None:
63
            s = s + ", %r" % (self.__path__,)
64 65
        s = s + ")"
        return s
66 67 68

class ModuleFinder:

69
    def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
70 71 72 73 74 75 76
        if path is None:
            path = sys.path
        self.path = path
        self.modules = {}
        self.badmodules = {}
        self.debug = debug
        self.indent = 0
77
        self.excludes = excludes
78 79
        self.replace_paths = replace_paths
        self.processed_paths = []   # Used in debugging only
80 81

    def msg(self, level, str, *args):
82 83
        if level <= self.debug:
            for i in range(self.indent):
84 85
                print("   ", end=' ')
            print(str, end=' ')
86
            for arg in args:
87 88
                print(repr(arg), end=' ')
            print()
89 90

    def msgin(self, *args):
91 92 93
        level = args[0]
        if level <= self.debug:
            self.indent = self.indent + 1
94
            self.msg(*args)
95 96

    def msgout(self, *args):
97 98 99
        level = args[0]
        if level <= self.debug:
            self.indent = self.indent - 1
100
            self.msg(*args)
101 102

    def run_script(self, pathname):
103
        self.msg(2, "run_script", pathname)
104
        with open(pathname) as fp:
105 106
            stuff = ("", "r", imp.PY_SOURCE)
            self.load_module('__main__', fp, pathname, stuff)
107 108

    def load_file(self, pathname):
109 110
        dir, name = os.path.split(pathname)
        name, ext = os.path.splitext(name)
111
        with open(pathname) as fp:
112 113
            stuff = (ext, "r", imp.PY_SOURCE)
            self.load_module(name, fp, pathname, stuff)
114

115 116 117
    def import_hook(self, name, caller=None, fromlist=None, level=-1):
        self.msg(3, "import_hook", name, caller, fromlist, level)
        parent = self.determine_parent(caller, level=level)
118 119 120 121 122 123
        q, tail = self.find_head_package(parent, name)
        m = self.load_tail(q, tail)
        if not fromlist:
            return q
        if m.__path__:
            self.ensure_fromlist(m, fromlist)
124
        return None
125

126 127 128
    def determine_parent(self, caller, level=-1):
        self.msgin(4, "determine_parent", caller, level)
        if not caller or level == 0:
129 130 131
            self.msgout(4, "determine_parent -> None")
            return None
        pname = caller.__name__
132 133 134 135 136 137 138 139 140
        if level >= 1: # relative import
            if caller.__path__:
                level -= 1
            if level == 0:
                parent = self.modules[pname]
                assert parent is caller
                self.msgout(4, "determine_parent ->", parent)
                return parent
            if pname.count(".") < level:
141
                raise ImportError("relative importpath too deep")
142 143 144 145
            pname = ".".join(pname.split(".")[:-level])
            parent = self.modules[pname]
            self.msgout(4, "determine_parent ->", parent)
            return parent
146 147 148 149 150 151
        if caller.__path__:
            parent = self.modules[pname]
            assert caller is parent
            self.msgout(4, "determine_parent ->", parent)
            return parent
        if '.' in pname:
152
            i = pname.rfind('.')
153 154 155 156 157 158 159
            pname = pname[:i]
            parent = self.modules[pname]
            assert parent.__name__ == pname
            self.msgout(4, "determine_parent ->", parent)
            return parent
        self.msgout(4, "determine_parent -> None")
        return None
160 161

    def find_head_package(self, parent, name):
162 163
        self.msgin(4, "find_head_package", parent, name)
        if '.' in name:
164
            i = name.find('.')
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
            head = name[:i]
            tail = name[i+1:]
        else:
            head = name
            tail = ""
        if parent:
            qname = "%s.%s" % (parent.__name__, head)
        else:
            qname = head
        q = self.import_module(head, qname, parent)
        if q:
            self.msgout(4, "find_head_package ->", (q, tail))
            return q, tail
        if parent:
            qname = head
            parent = None
            q = self.import_module(head, qname, parent)
            if q:
                self.msgout(4, "find_head_package ->", (q, tail))
                return q, tail
        self.msgout(4, "raise ImportError: No module named", qname)
186
        raise ImportError("No module named " + qname)
187 188

    def load_tail(self, q, tail):
189 190 191
        self.msgin(4, "load_tail", q, tail)
        m = q
        while tail:
192
            i = tail.find('.')
193 194 195 196 197 198
            if i < 0: i = len(tail)
            head, tail = tail[:i], tail[i+1:]
            mname = "%s.%s" % (m.__name__, head)
            m = self.import_module(head, mname, m)
            if not m:
                self.msgout(4, "raise ImportError: No module named", mname)
199
                raise ImportError("No module named " + mname)
200 201
        self.msgout(4, "load_tail ->", m)
        return m
202 203

    def ensure_fromlist(self, m, fromlist, recursive=0):
204 205 206 207 208 209 210 211 212 213 214
        self.msg(4, "ensure_fromlist", m, fromlist, recursive)
        for sub in fromlist:
            if sub == "*":
                if not recursive:
                    all = self.find_all_submodules(m)
                    if all:
                        self.ensure_fromlist(m, all, 1)
            elif not hasattr(m, sub):
                subname = "%s.%s" % (m.__name__, sub)
                submod = self.import_module(sub, subname, m)
                if not submod:
215
                    raise ImportError("No module named " + subname)
216 217

    def find_all_submodules(self, m):
218 219 220
        if not m.__path__:
            return
        modules = {}
221 222 223 224 225 226
        # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
        # But we must also collect Python extension modules - although
        # we cannot separate normal dlls from Python extensions.
        suffixes = []
        for triple in imp.get_suffixes():
            suffixes.append(triple[0])
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
        for dir in m.__path__:
            try:
                names = os.listdir(dir)
            except os.error:
                self.msg(2, "can't list directory", dir)
                continue
            for name in names:
                mod = None
                for suff in suffixes:
                    n = len(suff)
                    if name[-n:] == suff:
                        mod = name[:-n]
                        break
                if mod and mod != "__init__":
                    modules[mod] = mod
        return modules.keys()
243 244

    def import_module(self, partname, fqname, parent):
245 246 247 248 249 250 251 252
        self.msgin(3, "import_module", partname, fqname, parent)
        try:
            m = self.modules[fqname]
        except KeyError:
            pass
        else:
            self.msgout(3, "import_module ->", m)
            return m
253
        if fqname in self.badmodules:
254 255
            self.msgout(3, "import_module -> None")
            return None
256 257 258
        if parent and parent.__path__ is None:
            self.msgout(3, "import_module -> None")
            return None
259 260
        try:
            fp, pathname, stuff = self.find_module(partname,
261
                                                   parent and parent.__path__, parent)
262 263 264 265 266 267
        except ImportError:
            self.msgout(3, "import_module ->", None)
            return None
        try:
            m = self.load_module(fqname, fp, pathname, stuff)
        finally:
268 269
            if fp:
                fp.close()
270 271 272 273
        if parent:
            setattr(parent, partname, m)
        self.msgout(3, "import_module ->", m)
        return m
274

275 276
    def load_module(self, fqname, fp, pathname, file_info):
        suffix, mode, type = file_info
277 278 279 280 281 282
        self.msgin(2, "load_module", fqname, fp and "fp", pathname)
        if type == imp.PKG_DIRECTORY:
            m = self.load_package(fqname, pathname)
            self.msgout(2, "load_module ->", m)
            return m
        if type == imp.PY_SOURCE:
283
            co = compile(fp.read()+'\n', pathname, 'exec')
284 285 286
        elif type == imp.PY_COMPILED:
            if fp.read(4) != imp.get_magic():
                self.msgout(2, "raise ImportError: Bad magic number", pathname)
287
                raise ImportError("Bad magic number in %s" % pathname)
288 289 290 291 292
            fp.read(4)
            co = marshal.load(fp)
        else:
            co = None
        m = self.add_module(fqname)
293
        m.__file__ = pathname
294
        if co:
295 296
            if self.replace_paths:
                co = self.replace_paths_in_code(co)
297
            m.__code__ = co
298
            self.scan_code(co, m)
299 300
        self.msgout(2, "load_module ->", m)
        return m
301

302 303 304
    def _add_badmodule(self, name, caller):
        if name not in self.badmodules:
            self.badmodules[name] = {}
305 306 307 308
        if caller:
            self.badmodules[name][caller.__name__] = 1
        else:
            self.badmodules[name]["-"] = 1
309

310
    def _safe_import_hook(self, name, caller, fromlist, level=-1):
311 312 313 314 315
        # wrapper for self.import_hook() that won't raise ImportError
        if name in self.badmodules:
            self._add_badmodule(name, caller)
            return
        try:
316
            self.import_hook(name, caller, level=level)
317
        except ImportError as msg:
318 319 320 321 322 323 324 325 326
            self.msg(2, "ImportError:", str(msg))
            self._add_badmodule(name, caller)
        else:
            if fromlist:
                for sub in fromlist:
                    if sub in self.badmodules:
                        self._add_badmodule(sub, caller)
                        continue
                    try:
327
                        self.import_hook(name, caller, [sub], level=level)
328
                    except ImportError as msg:
329 330 331 332
                        self.msg(2, "ImportError:", str(msg))
                        fullname = name + "." + sub
                        self._add_badmodule(fullname, caller)

333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
    def scan_opcodes(self, co,
                     unpack = struct.unpack):
        # Scan the code, and yield 'interesting' opcode combinations
        # Version for Python 2.4 and older
        code = co.co_code
        names = co.co_names
        consts = co.co_consts
        while code:
            c = code[0]
            if c in STORE_OPS:
                oparg, = unpack('<H', code[1:3])
                yield "store", (names[oparg],)
                code = code[3:]
                continue
            if c == LOAD_CONST and code[3] == IMPORT_NAME:
                oparg_1, oparg_2 = unpack('<xHxH', code[:6])
                yield "import", (consts[oparg_1], names[oparg_2])
                code = code[6:]
                continue
            if c >= HAVE_ARGUMENT:
                code = code[3:]
            else:
                code = code[1:]

    def scan_opcodes_25(self, co,
                     unpack = struct.unpack):
        # Scan the code, and yield 'interesting' opcode combinations
        # Python 2.5 version (has absolute and relative imports)
        code = co.co_code
        names = co.co_names
        consts = co.co_consts
        LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
        while code:
366
            c = bytes([code[0]])
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
            if c in STORE_OPS:
                oparg, = unpack('<H', code[1:3])
                yield "store", (names[oparg],)
                code = code[3:]
                continue
            if code[:9:3] == LOAD_LOAD_AND_IMPORT:
                oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
                level = consts[oparg_1]
                if level == 0: # absolute import
                    yield "absolute_import", (consts[oparg_2], names[oparg_3])
                else: # relative import
                    yield "relative_import", (level, consts[oparg_2], names[oparg_3])
                code = code[9:]
                continue
            if c >= HAVE_ARGUMENT:
                code = code[3:]
            else:
                code = code[1:]

386 387
    def scan_code(self, co, m):
        code = co.co_code
388 389 390 391 392 393 394 395 396 397
        if sys.version_info >= (2, 5):
            scanner = self.scan_opcodes_25
        else:
            scanner = self.scan_opcodes
        for what, args in scanner(co):
            if what == "store":
                name, = args
                m.globalnames[name] = 1
            elif what == "absolute_import":
                fromlist, name = args
398 399 400 401 402
                have_star = 0
                if fromlist is not None:
                    if "*" in fromlist:
                        have_star = 1
                    fromlist = [f for f in fromlist if f != "*"]
403
                self._safe_import_hook(name, m, fromlist, level=0)
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
                if have_star:
                    # We've encountered an "import *". If it is a Python module,
                    # the code has already been parsed and we can suck out the
                    # global names.
                    mm = None
                    if m.__path__:
                        # At this point we don't know whether 'name' is a
                        # submodule of 'm' or a global module. Let's just try
                        # the full name first.
                        mm = self.modules.get(m.__name__ + "." + name)
                    if mm is None:
                        mm = self.modules.get(name)
                    if mm is not None:
                        m.globalnames.update(mm.globalnames)
                        m.starimports.update(mm.starimports)
                        if mm.__code__ is None:
                            m.starimports[name] = 1
                    else:
                        m.starimports[name] = 1
423 424 425 426 427 428 429 430 431 432 433
            elif what == "relative_import":
                level, fromlist, name = args
                if name:
                    self._safe_import_hook(name, m, fromlist, level=level)
                else:
                    parent = self.determine_parent(m, level=level)
                    self._safe_import_hook(parent.__name__, None, fromlist, level=0)
            else:
                # We don't expect anything else from the generator.
                raise RuntimeError(what)

434 435 436 437
        for c in co.co_consts:
            if isinstance(c, type(co)):
                self.scan_code(c, m)

438
    def load_package(self, fqname, pathname):
439
        self.msgin(2, "load_package", fqname, pathname)
440 441 442
        newname = replacePackageMap.get(fqname)
        if newname:
            fqname = newname
443 444 445
        m = self.add_module(fqname)
        m.__file__ = pathname
        m.__path__ = [pathname]
446

Guido van Rossum's avatar
Guido van Rossum committed
447 448
        # As per comment at top of file, simulate runtime __path__ additions.
        m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
449

450
        fp, buf, stuff = self.find_module("__init__", m.__path__)
451 452 453 454 455 456 457
        try:
            self.load_module(fqname, fp, buf, stuff)
            self.msgout(2, "load_package ->", m)
            return m
        finally:
            if fp:
                fp.close()
458 459

    def add_module(self, fqname):
460
        if fqname in self.modules:
461 462 463
            return self.modules[fqname]
        self.modules[fqname] = m = Module(fqname)
        return m
464

465 466
    def find_module(self, name, path, parent=None):
        if parent is not None:
467
            # assert path is not None
468
            fullname = parent.__name__+'.'+name
469 470 471 472
        else:
            fullname = name
        if fullname in self.excludes:
            self.msgout(3, "find_module -> Excluded", fullname)
473
            raise ImportError(name)
474

475 476 477
        if path is None:
            if name in sys.builtin_module_names:
                return (None, None, ("", "", imp.C_BUILTIN))
478

479 480
            path = self.path
        return imp.find_module(name, path)
481 482

    def report(self):
483 484 485
        """Print a report to stdout, listing the found modules with their
        paths, as well as modules that are missing, or seem to be missing.
        """
486 487 488
        print()
        print("  %-25s %s" % ("Name", "File"))
        print("  %-25s %s" % ("----", "----"))
489
        # Print modules found
490
        keys = sorted(self.modules.keys())
491 492 493
        for key in keys:
            m = self.modules[key]
            if m.__path__:
494
                print("P", end=' ')
495
            else:
496 497
                print("m", end=' ')
            print("%-25s" % key, m.__file__ or "")
498 499

        # Print missing modules
500 501
        missing, maybe = self.any_missing_maybe()
        if missing:
502 503
            print()
            print("Missing modules:")
504
            for name in missing:
505
                mods = sorted(self.badmodules[name].keys())
506
                print("?", name, "imported from", ', '.join(mods))
507 508
        # Print modules that may be missing, but then again, maybe not...
        if maybe:
509 510 511
            print()
            print("Submodules thay appear to be missing, but could also be", end=' ')
            print("global names in the parent package:")
512
            for name in maybe:
513
                mods = sorted(self.badmodules[name].keys())
514
                print("?", name, "imported from", ', '.join(mods))
515

516
    def any_missing(self):
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
        """Return a list of modules that appear to be missing. Use
        any_missing_maybe() if you want to know which modules are
        certain to be missing, and which *may* be missing.
        """
        missing, maybe = self.any_missing_maybe()
        return missing + maybe

    def any_missing_maybe(self):
        """Return two lists, one with modules that are certainly missing
        and one with modules that *may* be missing. The latter names could
        either be submodules *or* just global names in the package.

        The reason it can't always be determined is that it's impossible to
        tell which names are imported when "from module import *" is done
        with an extension module, short of actually importing it.
        """
533
        missing = []
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
        maybe = []
        for name in self.badmodules:
            if name in self.excludes:
                continue
            i = name.rfind(".")
            if i < 0:
                missing.append(name)
                continue
            subname = name[i+1:]
            pkgname = name[:i]
            pkg = self.modules.get(pkgname)
            if pkg is not None:
                if pkgname in self.badmodules[name]:
                    # The package tried to import this module itself and
                    # failed. It's definitely missing.
                    missing.append(name)
                elif subname in pkg.globalnames:
                    # It's a global in the package: definitely not missing.
                    pass
                elif pkg.starimports:
                    # It could be missing, but the package did an "import *"
                    # from a non-Python module, so we simply can't be sure.
                    maybe.append(name)
                else:
                    # It's not a global in the package, the package didn't
                    # do funny star imports, it's very likely to be missing.
                    # The symbol could be inserted into the package from the
                    # outside, but since that's not good style we simply list
                    # it missing.
                    missing.append(name)
            else:
                missing.append(name)
        missing.sort()
        maybe.sort()
        return missing, maybe
569

570 571
    def replace_paths_in_code(self, co):
        new_filename = original_filename = os.path.normpath(co.co_filename)
572
        for f, r in self.replace_paths:
573
            if original_filename.startswith(f):
574
                new_filename = r + original_filename[len(f):]
575 576 577
                break

        if self.debug and original_filename not in self.processed_paths:
578
            if new_filename != original_filename:
579 580 581 582 583 584 585 586 587 588 589 590
                self.msgout(2, "co_filename %r changed to %r" \
                                    % (original_filename,new_filename,))
            else:
                self.msgout(2, "co_filename %r remains unchanged" \
                                    % (original_filename,))
            self.processed_paths.append(original_filename)

        consts = list(co.co_consts)
        for i in range(len(consts)):
            if isinstance(consts[i], type(co)):
                consts[i] = self.replace_paths_in_code(consts[i])

591
        return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
Tim Peters's avatar
Tim Peters committed
592 593
                         co.co_flags, co.co_code, tuple(consts), co.co_names,
                         co.co_varnames, new_filename, co.co_name,
594 595
                         co.co_firstlineno, co.co_lnotab,
                         co.co_freevars, co.co_cellvars)
596

597 598 599 600 601

def test():
    # Parse command line
    import getopt
    try:
602
        opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
603
    except getopt.error as msg:
604
        print(msg)
605
        return
606 607 608 609 610

    # Process options
    debug = 1
    domods = 0
    addpath = []
611
    exclude = []
612
    for o, a in opts:
613 614 615 616 617
        if o == '-d':
            debug = debug + 1
        if o == '-m':
            domods = 1
        if o == '-p':
618
            addpath = addpath + a.split(os.pathsep)
619 620
        if o == '-q':
            debug = 0
621 622
        if o == '-x':
            exclude.append(a)
623 624 625

    # Provide default arguments
    if not args:
626
        script = "hello.py"
627
    else:
628
        script = args[0]
629 630 631 632 633 634

    # Set the path based on sys.path and the script directory
    path = sys.path[:]
    path[0] = os.path.dirname(script)
    path = addpath + path
    if debug > 1:
635
        print("path:")
636
        for item in path:
637
            print("   ", repr(item))
638 639

    # Create the module finder and turn its crank
640
    mf = ModuleFinder(path, debug, exclude)
641
    for arg in args[1:]:
642 643 644 645 646 647 648 649
        if arg == '-m':
            domods = 1
            continue
        if domods:
            if arg[-2:] == '.*':
                mf.import_hook(arg[:-2], None, ["*"])
            else:
                mf.import_hook(arg)
650
        else:
651
            mf.load_file(arg)
652 653
    mf.run_script(script)
    mf.report()
654
    return mf  # for -i debugging
655 656 657 658


if __name__ == '__main__':
    try:
659
        mf = test()
660
    except KeyboardInterrupt:
661
        print("\n[interrupted]")