From ceb3087e1c6456ab3c6db533bb7bc1b5c4ca97a9 Mon Sep 17 00:00:00 2001 From: "Phillip J. Eby" Date: Tue, 18 Apr 2006 00:59:55 +0000 Subject: Second phase of refactoring for runpy, pkgutil, pydoc, and setuptools to share common PEP 302 support code, as described here: http://mail.python.org/pipermail/python-dev/2006-April/063724.html pydoc now supports PEP 302 importers, by way of utility functions in pkgutil, such as 'walk_packages()'. It will properly document modules that are in zip files, and is backward compatible to Python 2.3 (setuptools installs for Python <2.5 will bundle it so pydoc doesn't break when used with eggs.) What has not changed is that pydoc command line options do not support zip paths or other importer paths, and the webserver index does not support sys.meta_path. Those are probably okay as limitations. Tasks remaining: write docs and Misc/NEWS for pkgutil/pydoc changes, and update setuptools to use pkgutil wherever possible, then add it to the stdlib. --- Lib/pkgutil.py | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++-- Lib/pydoc.py | 182 +++++++++++++++++++++++---------------------------------- 2 files changed, 239 insertions(+), 114 deletions(-) diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index d4fe6ca..24de5d1 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -11,6 +11,7 @@ from types import ModuleType __all__ = [ 'get_importer', 'iter_importers', 'get_loader', 'find_loader', + 'walk_packages', 'iter_modules', 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path', ] @@ -27,6 +28,95 @@ def read_code(stream): return marshal.load(stream) +def simplegeneric(func): + """Make a trivial single-dispatch generic function""" + registry = {} + def wrapper(*args,**kw): + ob = args[0] + try: + cls = ob.__class__ + except AttributeError: + cls = type(ob) + try: + mro = cls.__mro__ + except AttributeError: + try: + class cls(cls,object): pass + mro = cls.__mro__[1:] + except TypeError: + mro = object, # must be an ExtensionClass or some such :( + for t in mro: + if t in registry: + return registry[t](*args,**kw) + else: + return func(*args,**kw) + try: + wrapper.__name__ = func.__name__ + except (TypeError,AttributeError): + pass # Python 2.3 doesn't allow functions to be renamed + + def register(typ, func=None): + if func is None: + return lambda f: register(typ, f) + registry[typ] = func + return func + + wrapper.__dict__ = func.__dict__ + wrapper.__doc__ = func.__doc__ + wrapper.register = register + return wrapper + + +def walk_packages(path=None, prefix='', onerror=None): + """Yield submodule names+loaders recursively, for path or sys.path""" + + def seen(p,m={}): + if p in m: return True + m[p] = True + + for importer, name, ispkg in iter_modules(path, prefix): + yield importer, name, ispkg + + if ispkg: + try: + __import__(name) + except ImportError: + if onerror is not None: + onerror() + else: + path = getattr(sys.modules[name], '__path__', None) or [] + + # don't traverse path items we've seen before + path = [p for p in path if not seen(p)] + + for item in walk_packages(path, name+'.'): + yield item + + +def iter_modules(path=None, prefix=''): + """Yield submodule names+loaders for path or sys.path""" + if path is None: + importers = iter_importers() + else: + importers = map(get_importer, path) + + yielded = {} + for i in importers: + for name, ispkg in iter_importer_modules(i, prefix): + if name not in yielded: + yielded[name] = 1 + yield i, name, ispkg + + +#@simplegeneric +def iter_importer_modules(importer, prefix=''): + if not hasattr(importer,'iter_modules'): + return [] + return importer.iter_modules(prefix) + +iter_importer_modules = simplegeneric(iter_importer_modules) + + class ImpImporter: """PEP 302 Importer that wraps Python's "classic" import algorithm @@ -49,13 +139,45 @@ class ImpImporter: if self.path is None: path = None else: - path = [self.path] + path = [os.path.realpath(self.path)] try: file, filename, etc = imp.find_module(subname, path) except ImportError: return None return ImpLoader(fullname, file, filename, etc) + def iter_modules(self, prefix=''): + if self.path is None or not os.path.isdir(self.path): + return + + yielded = {} + import inspect + + filenames = os.listdir(self.path) + filenames.sort() # handle packages before same-named modules + + for fn in filenames: + modname = inspect.getmodulename(fn) + if modname=='__init__' or modname in yielded: + continue + + path = os.path.join(self.path, fn) + ispkg = False + + if not modname and os.path.isdir(path) and '.' not in fn: + modname = fn + for fn in os.listdir(path): + subname = inspect.getmodulename(fn) + if subname=='__init__': + ispkg = True + break + else: + continue # not a package + + if modname and '.' not in modname: + yielded[modname] = 1 + yield prefix + modname, ispkg + class ImpLoader: """PEP 302 Loader that wraps Python's "classic" import algorithm @@ -97,7 +219,8 @@ class ImpLoader: "module %s" % (self.fullname, fullname)) return fullname - def is_package(self): + def is_package(self, fullname): + fullname = self._fix_name(fullname) return self.etc[2]==imp.PKG_DIRECTORY def get_code(self, fullname=None): @@ -136,6 +259,7 @@ class ImpLoader: self.source = self._get_delegate().get_source() return self.source + def _get_delegate(self): return ImpImporter(self.filename).find_module('__init__') @@ -149,6 +273,45 @@ class ImpLoader: return None +try: + import zipimport + from zipimport import zipimporter + + def iter_zipimport_modules(importer, prefix=''): + dirlist = zipimport._zip_directory_cache[importer.archive].keys() + dirlist.sort() + _prefix = importer.prefix + plen = len(_prefix) + yielded = {} + import inspect + for fn in dirlist: + if not fn.startswith(_prefix): + continue + + fn = fn[plen:].split(os.sep) + + if len(fn)==2 and fn[1].startswith('__init__.py'): + if fn[0] not in yielded: + yielded[fn[0]] = 1 + yield fn[0], True + + if len(fn)!=1: + continue + + modname = inspect.getmodulename(fn[0]) + if modname=='__init__': + continue + + if modname and '.' not in modname and modname not in yielded: + yielded[modname] = 1 + yield prefix + modname, False + + iter_importer_modules.register(zipimporter, iter_zipimport_modules) + +except ImportError: + pass + + def get_importer(path_item): """Retrieve a PEP 302 importer for the given path item @@ -183,7 +346,7 @@ def get_importer(path_item): return importer -def iter_importers(fullname): +def iter_importers(fullname=""): """Yield PEP 302 importers for the given module name If fullname contains a '.', the importers will be for the package @@ -224,7 +387,6 @@ def iter_importers(fullname): if '.' not in fullname: yield ImpImporter() - def get_loader(module_or_name): """Get a PEP 302 "loader" object for module_or_name @@ -250,7 +412,6 @@ def get_loader(module_or_name): fullname = module_or_name return find_loader(fullname) - def find_loader(fullname): """Find a PEP 302 "loader" object for fullname diff --git a/Lib/pydoc.py b/Lib/pydoc.py index ee45643..ff6e7ca 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -52,10 +52,16 @@ Richard Chamberlain, for the first implementation of textdoc. # the current directory is changed with os.chdir(), an incorrect # path will be displayed. -import sys, imp, os, re, types, inspect, __builtin__ +import sys, imp, os, re, types, inspect, __builtin__, pkgutil from repr import Repr from string import expandtabs, find, join, lower, split, strip, rfind, rstrip -from collections import deque +try: + from collections import deque +except ImportError: + # Python 2.3 compatibility + class deque(list): + def popleft(self): + return self.pop(0) # --------------------------------------------------------- common routines @@ -182,6 +188,23 @@ def ispackage(path): return True return False +def source_synopsis(file): + line = file.readline() + while line[:1] == '#' or not strip(line): + line = file.readline() + if not line: break + line = strip(line) + if line[:4] == 'r"""': line = line[1:] + if line[:3] == '"""': + line = line[3:] + if line[-1:] == '\\': line = line[:-1] + while not strip(line): + line = file.readline() + if not line: break + result = strip(split(line, '"""')[0]) + else: result = None + return result + def synopsis(filename, cache={}): """Get the one-line summary out of a module file.""" mtime = os.stat(filename).st_mtime @@ -196,24 +219,11 @@ def synopsis(filename, cache={}): if info and 'b' in info[2]: # binary modules have to be imported try: module = imp.load_module('__temp__', file, filename, info[1:]) except: return None - result = split(module.__doc__ or '', '\n')[0] + result = (module.__doc__ or '').splitlines()[0] del sys.modules['__temp__'] else: # text modules can be directly examined - line = file.readline() - while line[:1] == '#' or not strip(line): - line = file.readline() - if not line: break - line = strip(line) - if line[:4] == 'r"""': line = line[1:] - if line[:3] == '"""': - line = line[3:] - if line[-1:] == '\\': line = line[:-1] - while not strip(line): - line = file.readline() - if not line: break - result = strip(split(line, '"""')[0]) - else: result = None - file.close() + result = source_synopsis(file) + file.close() cache[filename] = (mtime, result) return result @@ -643,16 +653,8 @@ class HTMLDoc(Doc): if hasattr(object, '__path__'): modpkgs = [] - modnames = [] - for file in os.listdir(object.__path__[0]): - path = os.path.join(object.__path__[0], file) - modname = inspect.getmodulename(file) - if modname != '__init__': - if modname and modname not in modnames: - modpkgs.append((modname, name, 0, 0)) - modnames.append(modname) - elif ispackage(path): - modpkgs.append((file, name, 1, 0)) + for importer, modname, ispkg in pkgutil.iter_modules(object.__path__): + modpkgs.append((modname, name, ispkg, 0)) modpkgs.sort() contents = self.multicolumn(modpkgs, self.modpkglink) result = result + self.bigsection( @@ -796,7 +798,10 @@ class HTMLDoc(Doc): tag += ':
\n' # Sort attrs by name. - attrs.sort(key=lambda t: t[0]) + try: + attrs.sort(key=lambda t: t[0]) + except TypeError: + attrs.sort(lambda t1, t2: cmp(t1[0], t2[0])) # 2.3 compat # Pump out the attrs, segregated by kind. attrs = spill('Methods %s' % tag, attrs, @@ -914,25 +919,9 @@ class HTMLDoc(Doc): """Generate an HTML index for a directory of modules.""" modpkgs = [] if shadowed is None: shadowed = {} - seen = {} - files = os.listdir(dir) - - def found(name, ispackage, - modpkgs=modpkgs, shadowed=shadowed, seen=seen): - if name not in seen: - modpkgs.append((name, '', ispackage, name in shadowed)) - seen[name] = 1 - shadowed[name] = 1 - - # Package spam/__init__.py takes precedence over module spam.py. - for file in files: - path = os.path.join(dir, file) - if ispackage(path): found(file, 1) - for file in files: - path = os.path.join(dir, file) - if os.path.isfile(path): - modname = inspect.getmodulename(file) - if modname: found(modname, 0) + for importer, name, ispkg in pkgutil.iter_modules([dir]): + modpkgs.append((name, '', ispkg, name in shadowed)) + shadowed[name] = 1 modpkgs.sort() contents = self.multicolumn(modpkgs, self.modpkglink) @@ -1059,14 +1048,12 @@ class TextDoc(Doc): if hasattr(object, '__path__'): modpkgs = [] - for file in os.listdir(object.__path__[0]): - path = os.path.join(object.__path__[0], file) - modname = inspect.getmodulename(file) - if modname != '__init__': - if modname and modname not in modpkgs: - modpkgs.append(modname) - elif ispackage(path): - modpkgs.append(file + ' (package)') + for importer, modname, ispkg in pkgutil.iter_modules(object.__path__): + if ispkg: + modpkgs.append(modname + ' (package)') + else: + modpkgs.append(modname) + modpkgs.sort() result = result + self.section( 'PACKAGE CONTENTS', join(modpkgs, '\n')) @@ -1490,20 +1477,9 @@ def writedoc(thing, forceload=0): def writedocs(dir, pkgpath='', done=None): """Write out HTML documentation for all modules in a directory tree.""" if done is None: done = {} - for file in os.listdir(dir): - path = os.path.join(dir, file) - if ispackage(path): - writedocs(path, pkgpath + file + '.', done) - elif os.path.isfile(path): - modname = inspect.getmodulename(path) - if modname: - if modname == '__init__': - modname = pkgpath[:-1] # remove trailing period - else: - modname = pkgpath + modname - if modname not in done: - done[modname] = 1 - writedoc(modname) + for importer, modname, ispkg in pkgutil.walk_packages([dir], pkgpath): + writedoc(modname) + return class Helper: keywords = { @@ -1830,30 +1806,9 @@ class Scanner: self.state.append((child, self.children(child))) return child -class ModuleScanner(Scanner): - """An interruptible scanner that searches module synopses.""" - def __init__(self): - roots = map(lambda dir: (dir, ''), pathdirs()) - Scanner.__init__(self, roots, self.submodules, self.isnewpackage) - self.inodes = map(lambda (dir, pkg): os.stat(dir).st_ino, roots) - - def submodules(self, (dir, package)): - children = [] - for file in os.listdir(dir): - path = os.path.join(dir, file) - if ispackage(path): - children.append((path, package + (package and '.') + file)) - else: - children.append((path, package)) - children.sort() # so that spam.py comes before spam.pyc or spam.pyo - return children - def isnewpackage(self, (dir, package)): - inode = os.path.exists(dir) and os.stat(dir).st_ino - if not (os.path.islink(dir) and inode in self.inodes): - self.inodes.append(inode) # detect circular symbolic links - return ispackage(dir) - return False +class ModuleScanner: + """An interruptible scanner that searches module synopses.""" def run(self, callback, key=None, completer=None): if key: key = lower(key) @@ -1870,22 +1825,31 @@ class ModuleScanner(Scanner): if find(lower(modname + ' - ' + desc), key) >= 0: callback(None, modname, desc) - while not self.quit: - node = self.next() - if not node: break - path, package = node - modname = inspect.getmodulename(path) - if os.path.isfile(path) and modname: - modname = package + (package and '.') + modname - if not modname in seen: - seen[modname] = 1 # if we see spam.py, skip spam.pyc - if key is None: - callback(path, modname, '') + for importer, modname, ispkg in pkgutil.walk_packages(): + if self.quit: + break + if key is None: + callback(None, modname, '') + else: + loader = importer.find_module(modname) + if hasattr(loader,'get_source'): + import StringIO + desc = source_synopsis( + StringIO.StringIO(loader.get_source(modname)) + ) or '' + if hasattr(loader,'get_filename'): + path = loader.get_filename(modname) else: - desc = synopsis(path) or '' - if find(lower(modname + ' - ' + desc), key) >= 0: - callback(path, modname, desc) - if completer: completer() + path = None + else: + module = loader.load_module(modname) + desc = (module.__doc__ or '').splitlines()[0] + path = getattr(module,'__file__',None) + if find(lower(modname + ' - ' + desc), key) >= 0: + callback(path, modname, desc) + + if completer: + completer() def apropos(key): """Print all the one-line module summaries that contain a substring.""" @@ -1950,7 +1914,7 @@ def serve(port, callback=None, completer=None): 'Built-in Modules', '#ffffff', '#ee77aa', contents)] seen = {} - for dir in pathdirs(): + for dir in sys.path: indices.append(html.index(dir, seen)) contents = heading + join(indices) + '''

-- cgit v0.12