summaryrefslogtreecommitdiffstats
path: root/Lib/modulefinder.py
diff options
context:
space:
mode:
authorJust van Rossum <just@letterror.com>2002-12-31 16:27:33 (GMT)
committerJust van Rossum <just@letterror.com>2002-12-31 16:27:33 (GMT)
commit41c554fbec1be5412aea2b388f0952657a2f07e7 (patch)
treebce957259469d38a7297bb5395afdecd2116274a /Lib/modulefinder.py
parent6700e47b3c278dce4e4f12ed9e85964efa46788c (diff)
downloadcpython-41c554fbec1be5412aea2b388f0952657a2f07e7.zip
cpython-41c554fbec1be5412aea2b388f0952657a2f07e7.tar.gz
cpython-41c554fbec1be5412aea2b388f0952657a2f07e7.tar.bz2
moving modulefinder.py to the standard library
Diffstat (limited to 'Lib/modulefinder.py')
-rw-r--r--Lib/modulefinder.py486
1 files changed, 486 insertions, 0 deletions
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
new file mode 100644
index 0000000..cd03db4
--- /dev/null
+++ b/Lib/modulefinder.py
@@ -0,0 +1,486 @@
+"""Find modules used by a script, using introspection."""
+
+# This module should be kept compatible with Python 1.5.2, see PEP 291.
+
+import dis
+import imp
+import marshal
+import os
+import sys
+import new
+
+if hasattr(sys.__stdout__, "newlines"):
+ READ_MODE = "U" # universal line endings
+else:
+ # remain compatible with Python < 2.3
+ READ_MODE = "r"
+
+IMPORT_NAME = dis.opname.index('IMPORT_NAME')
+IMPORT_FROM = dis.opname.index('IMPORT_FROM')
+STORE_NAME = dis.opname.index('STORE_NAME')
+STORE_FAST = dis.opname.index('STORE_FAST')
+STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
+STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
+
+# Modulefinder does a good job at simulating Python's, but it can not
+# handle __path__ modifications packages make at runtime. Therefore there
+# is a mechanism whereby you can register extra paths in this map for a
+# package, and it will be honored.
+
+# Note this is a mapping is lists of paths.
+packagePathMap = {}
+
+# A Public interface
+def AddPackagePath(packagename, path):
+ paths = packagePathMap.get(packagename, [])
+ paths.append(path)
+ packagePathMap[packagename] = paths
+
+replacePackageMap = {}
+
+# This ReplacePackage mechanism allows modulefinder to work around the
+# way the _xmlplus package injects itself under the name "xml" into
+# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
+# before running ModuleFinder.
+
+def ReplacePackage(oldname, newname):
+ replacePackageMap[oldname] = newname
+
+
+class Module:
+
+ def __init__(self, name, file=None, path=None):
+ self.__name__ = name
+ self.__file__ = file
+ self.__path__ = path
+ self.__code__ = None
+
+ def __repr__(self):
+ s = "Module(%s" % `self.__name__`
+ if self.__file__ is not None:
+ s = s + ", %s" % `self.__file__`
+ if self.__path__ is not None:
+ s = s + ", %s" % `self.__path__`
+ s = s + ")"
+ return s
+
+class ModuleFinder:
+
+ def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
+ if path is None:
+ path = sys.path
+ self.path = path
+ self.modules = {}
+ self.badmodules = {}
+ self.debug = debug
+ self.indent = 0
+ self.excludes = excludes
+ self.replace_paths = replace_paths
+ self.processed_paths = [] # Used in debugging only
+
+ def msg(self, level, str, *args):
+ if level <= self.debug:
+ for i in range(self.indent):
+ print " ",
+ print str,
+ for arg in args:
+ print repr(arg),
+ print
+
+ def msgin(self, *args):
+ level = args[0]
+ if level <= self.debug:
+ self.indent = self.indent + 1
+ apply(self.msg, args)
+
+ def msgout(self, *args):
+ level = args[0]
+ if level <= self.debug:
+ self.indent = self.indent - 1
+ apply(self.msg, args)
+
+ def run_script(self, pathname):
+ self.msg(2, "run_script", pathname)
+ fp = open(pathname, READ_MODE)
+ stuff = ("", "r", imp.PY_SOURCE)
+ self.load_module('__main__', fp, pathname, stuff)
+
+ def load_file(self, pathname):
+ dir, name = os.path.split(pathname)
+ name, ext = os.path.splitext(name)
+ fp = open(pathname, READ_MODE)
+ stuff = (ext, "r", imp.PY_SOURCE)
+ self.load_module(name, fp, pathname, stuff)
+
+ def import_hook(self, name, caller=None, fromlist=None):
+ self.msg(3, "import_hook", name, caller, fromlist)
+ parent = self.determine_parent(caller)
+ q, tail = self.find_head_package(parent, name)
+ m = self.load_tail(q, tail)
+ if not fromlist:
+ return q
+ if m.__path__:
+ self.ensure_fromlist(m, fromlist)
+ return None
+
+ def determine_parent(self, caller):
+ self.msgin(4, "determine_parent", caller)
+ if not caller:
+ self.msgout(4, "determine_parent -> None")
+ return None
+ pname = caller.__name__
+ if caller.__path__:
+ parent = self.modules[pname]
+ assert caller is parent
+ self.msgout(4, "determine_parent ->", parent)
+ return parent
+ if '.' in pname:
+ i = pname.rfind('.')
+ pname = pname[:i]
+ parent = self.modules[pname]
+ assert parent.__name__ == pname
+ self.msgout(4, "determine_parent ->", parent)
+ return parent
+ self.msgout(4, "determine_parent -> None")
+ return None
+
+ def find_head_package(self, parent, name):
+ self.msgin(4, "find_head_package", parent, name)
+ if '.' in name:
+ i = name.find('.')
+ head = name[:i]
+ tail = name[i+1:]
+ else:
+ head = name
+ tail = ""
+ if parent:
+ qname = "%s.%s" % (parent.__name__, head)
+ else:
+ qname = head
+ q = self.import_module(head, qname, parent)
+ if q:
+ self.msgout(4, "find_head_package ->", (q, tail))
+ return q, tail
+ if parent:
+ qname = head
+ parent = None
+ q = self.import_module(head, qname, parent)
+ if q:
+ self.msgout(4, "find_head_package ->", (q, tail))
+ return q, tail
+ self.msgout(4, "raise ImportError: No module named", qname)
+ raise ImportError, "No module named " + qname
+
+ def load_tail(self, q, tail):
+ self.msgin(4, "load_tail", q, tail)
+ m = q
+ while tail:
+ i = tail.find('.')
+ if i < 0: i = len(tail)
+ head, tail = tail[:i], tail[i+1:]
+ mname = "%s.%s" % (m.__name__, head)
+ m = self.import_module(head, mname, m)
+ if not m:
+ self.msgout(4, "raise ImportError: No module named", mname)
+ raise ImportError, "No module named " + mname
+ self.msgout(4, "load_tail ->", m)
+ return m
+
+ def ensure_fromlist(self, m, fromlist, recursive=0):
+ self.msg(4, "ensure_fromlist", m, fromlist, recursive)
+ for sub in fromlist:
+ if sub == "*":
+ if not recursive:
+ all = self.find_all_submodules(m)
+ if all:
+ self.ensure_fromlist(m, all, 1)
+ elif not hasattr(m, sub):
+ subname = "%s.%s" % (m.__name__, sub)
+ submod = self.import_module(sub, subname, m)
+ if not submod:
+ raise ImportError, "No module named " + subname
+
+ def find_all_submodules(self, m):
+ if not m.__path__:
+ return
+ modules = {}
+ suffixes = [".py", ".pyc", ".pyo"]
+ for dir in m.__path__:
+ try:
+ names = os.listdir(dir)
+ except os.error:
+ self.msg(2, "can't list directory", dir)
+ continue
+ for name in names:
+ mod = None
+ for suff in suffixes:
+ n = len(suff)
+ if name[-n:] == suff:
+ mod = name[:-n]
+ break
+ if mod and mod != "__init__":
+ modules[mod] = mod
+ return modules.keys()
+
+ def import_module(self, partname, fqname, parent):
+ self.msgin(3, "import_module", partname, fqname, parent)
+ try:
+ m = self.modules[fqname]
+ except KeyError:
+ pass
+ else:
+ self.msgout(3, "import_module ->", m)
+ return m
+ if self.badmodules.has_key(fqname):
+ self.msgout(3, "import_module -> None")
+ if parent:
+ self.badmodules[fqname][parent.__name__] = None
+ return None
+ try:
+ fp, pathname, stuff = self.find_module(partname,
+ parent and parent.__path__)
+ except ImportError:
+ self.msgout(3, "import_module ->", None)
+ return None
+ try:
+ m = self.load_module(fqname, fp, pathname, stuff)
+ finally:
+ if fp: fp.close()
+ if parent:
+ setattr(parent, partname, m)
+ self.msgout(3, "import_module ->", m)
+ return m
+
+ def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
+ self.msgin(2, "load_module", fqname, fp and "fp", pathname)
+ if type == imp.PKG_DIRECTORY:
+ m = self.load_package(fqname, pathname)
+ self.msgout(2, "load_module ->", m)
+ return m
+ if type == imp.PY_SOURCE:
+ co = compile(fp.read()+'\n', pathname, 'exec')
+ elif type == imp.PY_COMPILED:
+ if fp.read(4) != imp.get_magic():
+ self.msgout(2, "raise ImportError: Bad magic number", pathname)
+ raise ImportError, "Bad magic number in %s" % pathname
+ fp.read(4)
+ co = marshal.load(fp)
+ else:
+ co = None
+ m = self.add_module(fqname)
+ m.__file__ = pathname
+ if co:
+ if self.replace_paths:
+ co = self.replace_paths_in_code(co)
+ m.__code__ = co
+ self.scan_code(co, m)
+ self.msgout(2, "load_module ->", m)
+ return m
+
+ def scan_code(self, co, m):
+ code = co.co_code
+ n = len(code)
+ i = 0
+ lastname = None
+ while i < n:
+ c = code[i]
+ i = i+1
+ op = ord(c)
+ if op >= dis.HAVE_ARGUMENT:
+ oparg = ord(code[i]) + ord(code[i+1])*256
+ i = i+2
+ if op == IMPORT_NAME:
+ name = lastname = co.co_names[oparg]
+ if not self.badmodules.has_key(lastname):
+ try:
+ self.import_hook(name, m)
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ if not self.badmodules.has_key(name):
+ self.badmodules[name] = {}
+ self.badmodules[name][m.__name__] = None
+ elif op == IMPORT_FROM:
+ name = co.co_names[oparg]
+ assert lastname is not None
+ if not self.badmodules.has_key(lastname):
+ try:
+ self.import_hook(lastname, m, [name])
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ fullname = lastname + "." + name
+ if not self.badmodules.has_key(fullname):
+ self.badmodules[fullname] = {}
+ self.badmodules[fullname][m.__name__] = None
+ elif op in STORE_OPS:
+ # Skip; each IMPORT_FROM is followed by a STORE_* opcode
+ pass
+ else:
+ lastname = None
+ for c in co.co_consts:
+ if isinstance(c, type(co)):
+ self.scan_code(c, m)
+
+ def load_package(self, fqname, pathname):
+ self.msgin(2, "load_package", fqname, pathname)
+ newname = replacePackageMap.get(fqname)
+ if newname:
+ fqname = newname
+ m = self.add_module(fqname)
+ m.__file__ = pathname
+ m.__path__ = [pathname]
+
+ # As per comment at top of file, simulate runtime __path__ additions.
+ m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
+
+ fp, buf, stuff = self.find_module("__init__", m.__path__)
+ self.load_module(fqname, fp, buf, stuff)
+ self.msgout(2, "load_package ->", m)
+ return m
+
+ def add_module(self, fqname):
+ if self.modules.has_key(fqname):
+ return self.modules[fqname]
+ self.modules[fqname] = m = Module(fqname)
+ return m
+
+ def find_module(self, name, path):
+ if path:
+ fullname = '.'.join(path)+'.'+name
+ else:
+ fullname = name
+ if fullname in self.excludes:
+ self.msgout(3, "find_module -> Excluded", fullname)
+ raise ImportError, name
+
+ if path is None:
+ if name in sys.builtin_module_names:
+ return (None, None, ("", "", imp.C_BUILTIN))
+
+ path = self.path
+ return imp.find_module(name, path)
+
+ def report(self):
+ print
+ print " %-25s %s" % ("Name", "File")
+ print " %-25s %s" % ("----", "----")
+ # Print modules found
+ keys = self.modules.keys()
+ keys.sort()
+ for key in keys:
+ m = self.modules[key]
+ if m.__path__:
+ print "P",
+ else:
+ print "m",
+ print "%-25s" % key, m.__file__ or ""
+
+ # Print missing modules
+ keys = self.badmodules.keys()
+ keys.sort()
+ for key in keys:
+ # ... but not if they were explicitly excluded.
+ if key not in self.excludes:
+ mods = self.badmodules[key].keys()
+ mods.sort()
+ print "?", key, "from", ', '.join(mods)
+
+ def any_missing(self):
+ keys = self.badmodules.keys()
+ missing = []
+ for key in keys:
+ if key not in self.excludes:
+ # Missing, and its not supposed to be
+ missing.append(key)
+ return missing
+
+ def replace_paths_in_code(self, co):
+ new_filename = original_filename = os.path.normpath(co.co_filename)
+ for f,r in self.replace_paths:
+ if original_filename.startswith(f):
+ new_filename = r+original_filename[len(f):]
+ break
+
+ if self.debug and original_filename not in self.processed_paths:
+ if new_filename!=original_filename:
+ self.msgout(2, "co_filename %r changed to %r" \
+ % (original_filename,new_filename,))
+ else:
+ self.msgout(2, "co_filename %r remains unchanged" \
+ % (original_filename,))
+ self.processed_paths.append(original_filename)
+
+ consts = list(co.co_consts)
+ for i in range(len(consts)):
+ if isinstance(consts[i], type(co)):
+ consts[i] = self.replace_paths_in_code(consts[i])
+
+ return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
+ co.co_flags, co.co_code, tuple(consts), co.co_names,
+ co.co_varnames, new_filename, co.co_name,
+ co.co_firstlineno, co.co_lnotab,
+ co.co_freevars, co.co_cellvars)
+
+
+def test():
+ # Parse command line
+ import getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
+ except getopt.error, msg:
+ print msg
+ return
+
+ # Process options
+ debug = 1
+ domods = 0
+ addpath = []
+ exclude = []
+ for o, a in opts:
+ if o == '-d':
+ debug = debug + 1
+ if o == '-m':
+ domods = 1
+ if o == '-p':
+ addpath = addpath + a.split(os.pathsep)
+ if o == '-q':
+ debug = 0
+ if o == '-x':
+ exclude.append(a)
+
+ # Provide default arguments
+ if not args:
+ script = "hello.py"
+ else:
+ script = args[0]
+
+ # Set the path based on sys.path and the script directory
+ path = sys.path[:]
+ path[0] = os.path.dirname(script)
+ path = addpath + path
+ if debug > 1:
+ print "path:"
+ for item in path:
+ print " ", `item`
+
+ # Create the module finder and turn its crank
+ mf = ModuleFinder(path, debug, exclude)
+ for arg in args[1:]:
+ if arg == '-m':
+ domods = 1
+ continue
+ if domods:
+ if arg[-2:] == '.*':
+ mf.import_hook(arg[:-2], None, ["*"])
+ else:
+ mf.import_hook(arg)
+ else:
+ mf.load_file(arg)
+ mf.run_script(script)
+ mf.report()
+
+
+if __name__ == '__main__':
+ try:
+ test()
+ except KeyboardInterrupt:
+ print "\n[interrupt]"