diff options
author | Just van Rossum <just@letterror.com> | 2002-12-31 16:33:00 (GMT) |
---|---|---|
committer | Just van Rossum <just@letterror.com> | 2002-12-31 16:33:00 (GMT) |
commit | e29310a2b3e50958b00c0e92b142992599cf7ef1 (patch) | |
tree | 00e5356a13d849e24917891c66c4098ebf3762e0 | |
parent | cbd6cd2312ee99d78bc3250cfdbfaff16dccb021 (diff) | |
download | cpython-e29310a2b3e50958b00c0e92b142992599cf7ef1.zip cpython-e29310a2b3e50958b00c0e92b142992599cf7ef1.tar.gz cpython-e29310a2b3e50958b00c0e92b142992599cf7ef1.tar.bz2 |
patch attached to sf item #643711:
any_missing() returns less bogus missing modules.
- I've rewritten scan_code() more or less from scratch,
factored bits and pieces out for readability.
- keep track of global assignments and failed imports per
module; use this to determine whether the Y in "from X
import Y" is a submodule or just a global name. This is not
100% doable: you can't tell which symbols are imported when
doing a star import of a non-Python module short of actually
importing it.
- added a new method to ModuleFinder: any_missing_maybe(),
which returns *two* lists, one with certain misses, one with
possible misses. The possible misses are *very* often false
alarms, so it's useful to keep this list separate.
any_misses() now simply returns the union of
any_missing_maybe().
TODO: documentation, test_modulefinder.py
-rw-r--r-- | Lib/modulefinder.py | 199 |
1 files changed, 150 insertions, 49 deletions
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index cd03db4..9d524c2 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -15,12 +15,11 @@ else: # remain compatible with Python < 2.3 READ_MODE = "r" +LOAD_CONST = dis.opname.index('LOAD_CONST') IMPORT_NAME = dis.opname.index('IMPORT_NAME') -IMPORT_FROM = dis.opname.index('IMPORT_FROM') STORE_NAME = dis.opname.index('STORE_NAME') -STORE_FAST = dis.opname.index('STORE_FAST') STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') -STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL] +STORE_OPS = [STORE_NAME, STORE_GLOBAL] # Modulefinder does a good job at simulating Python's, but it can not # handle __path__ modifications packages make at runtime. Therefore there @@ -54,6 +53,13 @@ class Module: self.__file__ = file self.__path__ = path self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} def __repr__(self): s = "Module(%s" % `self.__name__` @@ -66,7 +72,7 @@ class Module: class ModuleFinder: - def __init__(self, path=None, debug=0, excludes = [], replace_paths = []): + def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): if path is None: path = sys.path self.path = path @@ -233,8 +239,6 @@ class ModuleFinder: return m if self.badmodules.has_key(fqname): self.msgout(3, "import_module -> None") - if parent: - self.badmodules[fqname][parent.__name__] = None return None try: fp, pathname, stuff = self.find_module(partname, @@ -277,11 +281,39 @@ class ModuleFinder: self.msgout(2, "load_module ->", m) return m + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + self.badmodules[name][caller.__name__] = 1 + + def _safe_import_hook(self, name, caller, fromlist): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller) + except ImportError, msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + if sub in self.badmodules: + self._add_badmodule(sub, caller) + continue + try: + self.import_hook(name, caller, [sub]) + except ImportError, msg: + self.msg(2, "ImportError:", str(msg)) + fullname = name + "." + sub + self._add_badmodule(fullname, caller) + def scan_code(self, co, m): code = co.co_code n = len(code) i = 0 - lastname = None + fromlist = None while i < n: c = code[i] i = i+1 @@ -289,33 +321,43 @@ class ModuleFinder: if op >= dis.HAVE_ARGUMENT: oparg = ord(code[i]) + ord(code[i+1])*256 i = i+2 - if op == IMPORT_NAME: - name = lastname = co.co_names[oparg] - if not self.badmodules.has_key(lastname): - try: - self.import_hook(name, m) - except ImportError, msg: - self.msg(2, "ImportError:", str(msg)) - if not self.badmodules.has_key(name): - self.badmodules[name] = {} - self.badmodules[name][m.__name__] = None - elif op == IMPORT_FROM: + if op == LOAD_CONST: + # An IMPORT_NAME is always preceded by a LOAD_CONST, it's + # a tuple of "from" names, or None for a regular import. + # The tuple may contain "*" for "from <mod> import *" + fromlist = co.co_consts[oparg] + elif op == IMPORT_NAME: + assert fromlist is None or type(fromlist) is tuple name = co.co_names[oparg] - assert lastname is not None - if not self.badmodules.has_key(lastname): - try: - self.import_hook(lastname, m, [name]) - except ImportError, msg: - self.msg(2, "ImportError:", str(msg)) - fullname = lastname + "." + name - if not self.badmodules.has_key(fullname): - self.badmodules[fullname] = {} - self.badmodules[fullname][m.__name__] = None + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 elif op in STORE_OPS: - # Skip; each IMPORT_FROM is followed by a STORE_* opcode - pass - else: - lastname = None + # keep track of all global names that are assigned to + name = co.co_names[oparg] + m.globalnames[name] = 1 for c in co.co_consts: if isinstance(c, type(co)): self.scan_code(c, m) @@ -360,6 +402,9 @@ class ModuleFinder: return imp.find_module(name, path) def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ print print " %-25s %s" % ("Name", "File") print " %-25s %s" % ("----", "----") @@ -367,6 +412,7 @@ class ModuleFinder: keys = self.modules.keys() keys.sort() for key in keys: + continue m = self.modules[key] if m.__path__: print "P", @@ -375,33 +421,87 @@ class ModuleFinder: print "%-25s" % key, m.__file__ or "" # Print missing modules - keys = self.badmodules.keys() - keys.sort() - for key in keys: - # ... but not if they were explicitly excluded. - if key not in self.excludes: - mods = self.badmodules[key].keys() + missing, maybe = self.any_missing_maybe() + if missing: + print + print "Missing modules:" + for name in missing: + mods = self.badmodules[name].keys() mods.sort() - print "?", key, "from", ', '.join(mods) + print "?", name, "imported from", ', '.join(mods) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print + print "Submodules thay appear to be missing, but could also be", + print "global names in the parent package:" + for name in maybe: + mods = self.badmodules[name].keys() + mods.sort() + print "?", name, "imported from", ', '.join(mods) def any_missing(self): - keys = self.badmodules.keys() + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ missing = [] - for key in keys: - if key not in self.excludes: - # Missing, and its not supposed to be - missing.append(key) - return missing + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe def replace_paths_in_code(self, co): new_filename = original_filename = os.path.normpath(co.co_filename) - for f,r in self.replace_paths: + for f, r in self.replace_paths: if original_filename.startswith(f): - new_filename = r+original_filename[len(f):] + new_filename = r + original_filename[len(f):] break if self.debug and original_filename not in self.processed_paths: - if new_filename!=original_filename: + if new_filename != original_filename: self.msgout(2, "co_filename %r changed to %r" \ % (original_filename,new_filename,)) else: @@ -477,10 +577,11 @@ def test(): mf.load_file(arg) mf.run_script(script) mf.report() + return mf # for -i debugging if __name__ == '__main__': try: - test() + mf = test() except KeyboardInterrupt: print "\n[interrupt]" |