From 91cf882b367644ece7f121cd22fc43c2f439a2d5 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Sat, 21 Feb 2009 05:41:15 +0000 Subject: Refactor source and bytecode file loaders in importlib so that there are source-only and source/bytecode loaders. --- Lib/importlib/NOTES | 28 +-- Lib/importlib/_bootstrap.py | 260 +++++++++++++--------- Lib/importlib/test/source/test_loader.py | 22 +- Lib/importlib/test/source/test_source_encoding.py | 4 +- 4 files changed, 164 insertions(+), 150 deletions(-) diff --git a/Lib/importlib/NOTES b/Lib/importlib/NOTES index 35c8fd3..22103a1 100644 --- a/Lib/importlib/NOTES +++ b/Lib/importlib/NOTES @@ -1,31 +1,6 @@ to do ///// -* Refactor source/bytecode finder/loader code such that bytecode support is a - subclass of source support (makes it nicer for VMs that don't use CPython - bytecode). - - + PyLoader (for ABC) - - - load_module for source only - - get_code for source only - - + PyFileLoader(PyLoader) - - - get_data - - source_mtime - - source_path - - +PyPycLoader (PyLoader, for ABC) - - - load_module for source and bytecode - - get_code for source and bytecode - - + PyPycFileLoader(PyPycLoader, PyFileLoader) - - - bytecode_path - - write_bytecode - * Implement PEP 302 protocol for loaders (should just be a matter of testing). + Source/bytecode. @@ -42,7 +17,6 @@ to do * load_module - - (?) Importer(Finder, Loader) - ResourceLoader(Loader) * get_data @@ -89,6 +63,8 @@ to do * Add leading underscores to all objects in importlib._bootstrap that are not publicly exposed. +* Reorder importlib/_bootstrap.py so definitions are not in inverted order. + * Make sure that there is documentation *somewhere* fully explaining the semantics of import that can be referenced from the package's documentation (even if it is in the package documentation itself, although it might be best diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 97e3f54..45c1b05 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -315,17 +315,124 @@ def module_for_loader(fxn): return decorated -class _PyFileLoader: - # XXX Still smart to have this as a separate class? Or would it work - # better to integrate with PyFileFinder? Could cache _is_pkg info. - # FileFinder can be changed to return self instead of a specific loader - # call. Otherwise _base_path can be calculated on the fly without issue if - # it is known whether a module should be treated as a path or package to - # minimize stat calls. Could even go as far as to stat the directory the - # importer is in to detect changes and then cache all the info about what - # files were found (if stating directories is platform-dependent). - - """Load a Python source or bytecode file.""" +class PyLoader: + + """Loader base class for Python source. + + Requires implementing the optional PEP 302 protocols as well as + source_mtime and source_path. + + """ + + @module_for_loader + def load_module(self, module): + """Load a source module.""" + return _load_module(module) + + def _load_module(self, module): + """Initialize a module from source.""" + name = module.__name__ + source_path = self.source_path(name) + code_object = self.get_code(module.__name__) + if not hasattr(module, '__file__'): + module.__file__ = source_path + if self.is_package(name): + module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]] + module.__package__ = module.__name__ + if not hasattr(module, '__path__'): + module.__package__ = module.__package__.rpartition('.')[0] + exec(code_object, module.__dict__) + return module + + def get_code(self, fullname): + """Get a code object from source.""" + source_path = self.source_path(fullname) + source = self.get_data(source_path) + # Convert to universal newlines. + line_endings = b'\n' + for index, c in enumerate(source): + if c == ord(b'\n'): + break + elif c == ord(b'\r'): + line_endings = b'\r' + try: + if source[index+1] == ord(b'\n'): + line_endings += b'\n' + except IndexError: + pass + break + if line_endings != b'\n': + source = source.replace(line_endings, b'\n') + return compile(source, source_path, 'exec', dont_inherit=True) + + +class PyPycLoader(PyLoader): + + """Loader base class for Python source and bytecode. + + Requires implementing the methods needed for PyLoader as well as + bytecode_path and write_bytecode. + + """ + + @module_for_loader + def load_module(self, module): + """Load a module from source or bytecode.""" + name = module.__name__ + source_path = self.source_path(name) + bytecode_path = self.bytecode_path(name) + module.__file__ = source_path if source_path else bytecode_path + return self._load_module(module) + + def get_code(self, fullname): + """Get a code object from source or bytecode.""" + # XXX Care enough to make sure this call does not happen if the magic + # number is bad? + source_timestamp = self.source_mtime(fullname) + # Try to use bytecode if it is available. + bytecode_path = self.bytecode_path(fullname) + if bytecode_path: + data = self.get_data(bytecode_path) + magic = data[:4] + pyc_timestamp = marshal._r_long(data[4:8]) + bytecode = data[8:] + try: + # Verify that the magic number is valid. + if imp.get_magic() != magic: + raise ImportError("bad magic number") + # Verify that the bytecode is not stale (only matters when + # there is source to fall back on. + if source_timestamp: + if pyc_timestamp < source_timestamp: + raise ImportError("bytecode is stale") + except ImportError: + # If source is available give it a shot. + if source_timestamp is not None: + pass + else: + raise + else: + # Bytecode seems fine, so try to use it. + # XXX If the bytecode is ill-formed, would it be beneficial to + # try for using source if available and issue a warning? + return marshal.loads(bytecode) + elif source_timestamp is None: + raise ImportError("no source or bytecode available to create code " + "object for {0!r}".format(fullname)) + # Use the source. + code_object = super().get_code(fullname) + # Generate bytecode and write it out. + if not sys.dont_write_bytecode: + data = bytearray(imp.get_magic()) + data.extend(marshal._w_long(source_timestamp)) + data.extend(marshal.dumps(code_object)) + self.write_bytecode(fullname, data) + return code_object + + +class PyFileLoader(PyLoader): + + """Load a Python source file.""" def __init__(self, name, path, is_pkg): self._name = name @@ -354,29 +461,6 @@ class _PyFileLoader: # Not a property so that it is easy to override. return self._find_path(imp.PY_SOURCE) - @check_name - def bytecode_path(self, fullname): - """Return the path to a bytecode file, or None if one does not - exist.""" - # Not a property for easy overriding. - return self._find_path(imp.PY_COMPILED) - - @module_for_loader - def load_module(self, module): - """Load a Python source or bytecode module.""" - name = module.__name__ - source_path = self.source_path(name) - bytecode_path = self.bytecode_path(name) - code_object = self.get_code(module.__name__) - module.__file__ = source_path if source_path else bytecode_path - module.__loader__ = self - if self.is_package(name): - module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]] - module.__package__ = module.__name__ - if not hasattr(module, '__path__'): - module.__package__ = module.__package__.rpartition('.')[0] - exec(code_object, module.__dict__) - return module @check_name def source_mtime(self, name): @@ -405,6 +489,34 @@ class _PyFileLoader: # anything other than UTF-8. return open(source_path, encoding=encoding).read() + + def get_data(self, path): + """Return the data from path as raw bytes.""" + return _fileio._FileIO(path, 'r').read() + + @check_name + def is_package(self, fullname): + """Return a boolean based on whether the module is a package. + + Raises ImportError (like get_source) if the loader cannot handle the + package. + + """ + return self._is_pkg + + +# XXX Rename _PyFileLoader throughout +class PyPycFileLoader(PyPycLoader, PyFileLoader): + + """Load a module from a source or bytecode file.""" + + @check_name + def bytecode_path(self, fullname): + """Return the path to a bytecode file, or None if one does not + exist.""" + # Not a property for easy overriding. + return self._find_path(imp.PY_COMPILED) + @check_name def write_bytecode(self, name, data): """Write out 'data' for the specified module, returning a boolean @@ -428,82 +540,6 @@ class _PyFileLoader: else: raise - def get_code(self, name): - """Return the code object for the module.""" - # XXX Care enough to make sure this call does not happen if the magic - # number is bad? - source_timestamp = self.source_mtime(name) - # Try to use bytecode if it is available. - bytecode_path = self.bytecode_path(name) - if bytecode_path: - data = self.get_data(bytecode_path) - magic = data[:4] - pyc_timestamp = marshal._r_long(data[4:8]) - bytecode = data[8:] - try: - # Verify that the magic number is valid. - if imp.get_magic() != magic: - raise ImportError("bad magic number") - # Verify that the bytecode is not stale (only matters when - # there is source to fall back on. - if source_timestamp: - if pyc_timestamp < source_timestamp: - raise ImportError("bytcode is stale") - except ImportError: - # If source is available give it a shot. - if source_timestamp is not None: - pass - else: - raise - else: - # Bytecode seems fine, so try to use it. - # XXX If the bytecode is ill-formed, would it be beneficial to - # try for using source if available and issue a warning? - return marshal.loads(bytecode) - elif source_timestamp is None: - raise ImportError("no source or bytecode available to create code " - "object for {0!r}".format(name)) - # Use the source. - source_path = self.source_path(name) - source = self.get_data(source_path) - # Convert to universal newlines. - line_endings = b'\n' - for index, c in enumerate(source): - if c == ord(b'\n'): - break - elif c == ord(b'\r'): - line_endings = b'\r' - try: - if source[index+1] == ord(b'\n'): - line_endings += b'\n' - except IndexError: - pass - break - if line_endings != b'\n': - source = source.replace(line_endings, b'\n') - code_object = compile(source, source_path, 'exec', dont_inherit=True) - # Generate bytecode and write it out. - if not sys.dont_write_bytecode: - data = bytearray(imp.get_magic()) - data.extend(marshal._w_long(source_timestamp)) - data.extend(marshal.dumps(code_object)) - self.write_bytecode(name, data) - return code_object - - def get_data(self, path): - """Return the data from path as raw bytes.""" - return _fileio._FileIO(path, 'r').read() - - @check_name - def is_package(self, fullname): - """Return a boolean based on whether the module is a package. - - Raises ImportError (like get_source) if the loader cannot handle the - package. - - """ - return self._is_pkg - class FileFinder: @@ -583,7 +619,7 @@ class PyFileFinder(FileFinder): """Importer for source/bytecode files.""" _possible_package = True - _loader = _PyFileLoader + _loader = PyFileLoader def __init__(self, path_entry): # Lack of imp during class creation means _suffixes is set here. @@ -597,6 +633,8 @@ class PyPycFileFinder(PyFileFinder): """Finder for source and bytecode files.""" + _loader = PyPycFileLoader + def __init__(self, path_entry): super().__init__(path_entry) self._suffixes += suffix_list(imp.PY_COMPILED) diff --git a/Lib/importlib/test/source/test_loader.py b/Lib/importlib/test/source/test_loader.py index 4ca9af1..960210f 100644 --- a/Lib/importlib/test/source/test_loader.py +++ b/Lib/importlib/test/source/test_loader.py @@ -19,7 +19,7 @@ class SimpleTest(unittest.TestCase): # [basic] def test_module(self): with source_util.create_modules('_temp') as mapping: - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) module = loader.load_module('_temp') self.assert_('_temp' in sys.modules) check = {'__name__': '_temp', '__file__': mapping['_temp'], @@ -29,7 +29,7 @@ class SimpleTest(unittest.TestCase): def test_package(self): with source_util.create_modules('_pkg.__init__') as mapping: - loader = importlib._PyFileLoader('_pkg', mapping['_pkg.__init__'], + loader = importlib.PyPycFileLoader('_pkg', mapping['_pkg.__init__'], True) module = loader.load_module('_pkg') self.assert_('_pkg' in sys.modules) @@ -42,7 +42,7 @@ class SimpleTest(unittest.TestCase): def test_lacking_parent(self): with source_util.create_modules('_pkg.__init__', '_pkg.mod')as mapping: - loader = importlib._PyFileLoader('_pkg.mod', mapping['_pkg.mod'], + loader = importlib.PyPycFileLoader('_pkg.mod', mapping['_pkg.mod'], False) module = loader.load_module('_pkg.mod') self.assert_('_pkg.mod' in sys.modules) @@ -57,7 +57,7 @@ class SimpleTest(unittest.TestCase): def test_module_reuse(self): with source_util.create_modules('_temp') as mapping: - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) module = loader.load_module('_temp') module_id = id(module) module_dict_id = id(module.__dict__) @@ -87,7 +87,7 @@ class SimpleTest(unittest.TestCase): setattr(orig_module, attr, value) with open(mapping[name], 'w') as file: file.write('+++ bad syntax +++') - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) self.assertRaises(SyntaxError, loader.load_module, name) for attr in attributes: self.assertEqual(getattr(orig_module, attr), value) @@ -97,7 +97,7 @@ class SimpleTest(unittest.TestCase): with source_util.create_modules('_temp') as mapping: with open(mapping['_temp'], 'w') as file: file.write('=') - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) self.assertRaises(SyntaxError, loader.load_module, '_temp') self.assert_('_temp' not in sys.modules) @@ -112,7 +112,7 @@ class DontWriteBytecodeTest(unittest.TestCase): @source_util.writes_bytecode def run_test(self, assertion): with source_util.create_modules('_temp') as mapping: - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) loader.load_module('_temp') bytecode_path = source_util.bytecode_path(mapping['_temp']) assertion(bytecode_path) @@ -144,7 +144,7 @@ class BadDataTest(unittest.TestCase): with open(bytecode_path, 'r+b') as file: file.seek(0) file.write(b'\x00\x00\x00\x00') - loader = importlib._PyFileLoader('_temp', mapping['_temp'], False) + loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False) self.assertRaises(ImportError, loader.load_module, '_temp') self.assert_('_temp' not in sys.modules) @@ -159,7 +159,7 @@ class SourceBytecodeInteraction(unittest.TestCase): """ def import_(self, file, module, *, pkg=False): - loader = importlib._PyFileLoader(module, file, pkg) + loader = importlib.PyPycFileLoader(module, file, pkg) return loader.load_module(module) def run_test(self, test, *create, pkg=False): @@ -171,7 +171,7 @@ class SourceBytecodeInteraction(unittest.TestCase): import_name = test.rsplit('.', 1)[0] else: import_name = test - loader = importlib._PyFileLoader(import_name, mapping[test], pkg) + loader = importlib.PyPycFileLoader(import_name, mapping[test], pkg) # Because some platforms only have a granularity to the second for # atime you can't check the physical files. Instead just make it an # exception trigger if source was read. @@ -212,7 +212,7 @@ class BadBytecodeTest(unittest.TestCase): """ def import_(self, file, module_name): - loader = importlib._PyFileLoader(module_name, file, False) + loader = importlib.PyPycFileLoader(module_name, file, False) module = loader.load_module(module_name) self.assert_(module_name in sys.modules) diff --git a/Lib/importlib/test/source/test_source_encoding.py b/Lib/importlib/test/source/test_source_encoding.py index 5ffdf8f..9e9c7e8 100644 --- a/Lib/importlib/test/source/test_source_encoding.py +++ b/Lib/importlib/test/source/test_source_encoding.py @@ -35,7 +35,7 @@ class EncodingTest(unittest.TestCase): with source_util.create_modules(self.module_name) as mapping: with open(mapping[self.module_name], 'wb')as file: file.write(source) - loader = importlib._PyFileLoader(self.module_name, + loader = importlib.PyPycFileLoader(self.module_name, mapping[self.module_name], False) return loader.load_module(self.module_name) @@ -96,7 +96,7 @@ class LineEndingTest(unittest.TestCase): with source_util.create_modules(module_name) as mapping: with open(mapping[module_name], 'wb') as file: file.write(source) - loader = importlib._PyFileLoader(module_name, mapping[module_name], + loader = importlib.PyPycFileLoader(module_name, mapping[module_name], False) return loader.load_module(module_name) -- cgit v0.12