diff options
author | Brett Cannon <brett@python.org> | 2015-03-13 14:40:49 (GMT) |
---|---|---|
committer | Brett Cannon <brett@python.org> | 2015-03-13 14:40:49 (GMT) |
commit | cc4dfc1b75e68863781beae49fc8cac5982c1c25 (patch) | |
tree | fcc63e3a9edc2d2777a9a8b681386c02c382d0d3 /Lib | |
parent | ff2a661ef0b660588523d5cd3764c377d7078c63 (diff) | |
download | cpython-cc4dfc1b75e68863781beae49fc8cac5982c1c25.zip cpython-cc4dfc1b75e68863781beae49fc8cac5982c1c25.tar.gz cpython-cc4dfc1b75e68863781beae49fc8cac5982c1c25.tar.bz2 |
Issue #23491: Implement PEP 441: Improving Python Zip Application Support
Thanks to Paul Moore for the PEP and implementation.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_zipapp.py | 250 | ||||
-rw-r--r-- | Lib/zipapp.py | 179 |
2 files changed, 429 insertions, 0 deletions
diff --git a/Lib/test/test_zipapp.py b/Lib/test/test_zipapp.py new file mode 100644 index 0000000..e85c93b --- /dev/null +++ b/Lib/test/test_zipapp.py @@ -0,0 +1,250 @@ +"""Test harness for the zipapp module.""" + +import io +import pathlib +import stat +import sys +import tempfile +import unittest +import zipapp +import zipfile + + +class ZipAppTest(unittest.TestCase): + + """Test zipapp module functionality.""" + + def setUp(self): + tmpdir = tempfile.TemporaryDirectory() + self.addCleanup(tmpdir.cleanup) + self.tmpdir = pathlib.Path(tmpdir.name) + + def test_create_archive(self): + # Test packing a directory. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + self.assertTrue(target.is_file()) + + def test_create_archive_with_subdirs(self): + # Test packing a directory includes entries for subdirectories. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + (source / 'foo').mkdir() + (source / 'bar').mkdir() + (source / 'foo' / '__init__.py').touch() + target = io.BytesIO() + zipapp.create_archive(str(source), target) + target.seek(0) + with zipfile.ZipFile(target, 'r') as z: + self.assertIn('foo/', z.namelist()) + self.assertIn('bar/', z.namelist()) + + def test_create_archive_default_target(self): + # Test packing a directory to the default name. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + zipapp.create_archive(str(source)) + expected_target = self.tmpdir / 'source.pyz' + self.assertTrue(expected_target.is_file()) + + def test_no_main(self): + # Test that packing a directory with no __main__.py fails. + source = self.tmpdir / 'source' + source.mkdir() + (source / 'foo.py').touch() + target = self.tmpdir / 'source.pyz' + with self.assertRaises(zipapp.ZipAppError): + zipapp.create_archive(str(source), str(target)) + + def test_main_and_main_py(self): + # Test that supplying a main argument with __main__.py fails. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + with self.assertRaises(zipapp.ZipAppError): + zipapp.create_archive(str(source), str(target), main='pkg.mod:fn') + + def test_main_written(self): + # Test that the __main__.py is written correctly. + source = self.tmpdir / 'source' + source.mkdir() + (source / 'foo.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), main='pkg.mod:fn') + with zipfile.ZipFile(str(target), 'r') as z: + self.assertIn('__main__.py', z.namelist()) + self.assertIn(b'pkg.mod.fn()', z.read('__main__.py')) + + def test_main_only_written_once(self): + # Test that we don't write multiple __main__.py files. + # The initial implementation had this bug; zip files allow + # multiple entries with the same name + source = self.tmpdir / 'source' + source.mkdir() + # Write 2 files, as the original bug wrote __main__.py + # once for each file written :-( + # See http://bugs.python.org/review/23491/diff/13982/Lib/zipapp.py#newcode67Lib/zipapp.py:67 + # (line 67) + (source / 'foo.py').touch() + (source / 'bar.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), main='pkg.mod:fn') + with zipfile.ZipFile(str(target), 'r') as z: + self.assertEqual(1, z.namelist().count('__main__.py')) + + def test_main_validation(self): + # Test that invalid values for main are rejected. + source = self.tmpdir / 'source' + source.mkdir() + target = self.tmpdir / 'source.pyz' + problems = [ + '', 'foo', 'foo:', ':bar', '12:bar', 'a.b.c.:d', + '.a:b', 'a:b.', 'a:.b', 'a:silly name' + ] + for main in problems: + with self.subTest(main=main): + with self.assertRaises(zipapp.ZipAppError): + zipapp.create_archive(str(source), str(target), main=main) + + def test_default_no_shebang(self): + # Test that no shebang line is written to the target by default. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + with target.open('rb') as f: + self.assertNotEqual(f.read(2), b'#!') + + def test_custom_interpreter(self): + # Test that a shebang line with a custom interpreter is written + # correctly. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + with target.open('rb') as f: + self.assertEqual(f.read(2), b'#!') + self.assertEqual(b'python\n', f.readline()) + + def test_pack_to_fileobj(self): + # Test that we can pack to a file object. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = io.BytesIO() + zipapp.create_archive(str(source), target, interpreter='python') + self.assertTrue(target.getvalue().startswith(b'#!python\n')) + + def test_read_shebang(self): + # Test that we can read the shebang line correctly. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + self.assertEqual(zipapp.get_interpreter(str(target)), 'python') + + def test_read_missing_shebang(self): + # Test that reading the shebang line of a file without one returns None. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + self.assertEqual(zipapp.get_interpreter(str(target)), None) + + def test_modify_shebang(self): + # Test that we can change the shebang of a file. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = self.tmpdir / 'changed.pyz' + zipapp.create_archive(str(target), str(new_target), interpreter='python2.7') + self.assertEqual(zipapp.get_interpreter(str(new_target)), 'python2.7') + + def test_write_shebang_to_fileobj(self): + # Test that we can change the shebang of a file, writing the result to a + # file object. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = io.BytesIO() + zipapp.create_archive(str(target), new_target, interpreter='python2.7') + self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + + def test_read_from_fileobj(self): + # Test that we can copy an archive using an open file object. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + temp_archive = io.BytesIO() + zipapp.create_archive(str(source), temp_archive, interpreter='python') + new_target = io.BytesIO() + temp_archive.seek(0) + zipapp.create_archive(temp_archive, new_target, interpreter='python2.7') + self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + + def test_remove_shebang(self): + # Test that we can remove the shebang from a file. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = self.tmpdir / 'changed.pyz' + zipapp.create_archive(str(target), str(new_target), interpreter=None) + self.assertEqual(zipapp.get_interpreter(str(new_target)), None) + + def test_content_of_copied_archive(self): + # Test that copying an archive doesn't corrupt it. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = io.BytesIO() + zipapp.create_archive(str(source), target, interpreter='python') + new_target = io.BytesIO() + target.seek(0) + zipapp.create_archive(target, new_target, interpreter=None) + new_target.seek(0) + with zipfile.ZipFile(new_target, 'r') as z: + self.assertEqual(set(z.namelist()), {'__main__.py'}) + + # (Unix only) tests that archives with shebang lines are made executable + @unittest.skipIf(sys.platform == 'win32', + 'Windows does not support an executable bit') + def test_shebang_is_executable(self): + # Test that an archive with a shebang line is made executable. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + self.assertTrue(target.stat().st_mode & stat.S_IEXEC) + + @unittest.skipIf(sys.platform == 'win32', + 'Windows does not support an executable bit') + def test_no_shebang_is_not_executable(self): + # Test that an archive with no shebang line is not made executable. + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter=None) + self.assertFalse(target.stat().st_mode & stat.S_IEXEC) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/zipapp.py b/Lib/zipapp.py new file mode 100644 index 0000000..3b8f9bf --- /dev/null +++ b/Lib/zipapp.py @@ -0,0 +1,179 @@ +import contextlib +import os +import pathlib +import shutil +import stat +import sys +import zipfile + +__all__ = ['ZipAppError', 'create_archive', 'get_interpreter'] + + +# The __main__.py used if the users specifies "-m module:fn". +# Note that this will always be written as UTF-8 (module and +# function names can be non-ASCII in Python 3). +# We add a coding cookie even though UTF-8 is the default in Python 3 +# because the resulting archive may be intended to be run under Python 2. +MAIN_TEMPLATE = """\ +# -*- coding: utf-8 -*- +import {module} +{module}.{fn}() +""" + + +# The Windows launcher defaults to UTF-8 when parsing shebang lines if the +# file has no BOM. So use UTF-8 on Windows. +# On Unix, use the filesystem encoding. +if sys.platform.startswith('win'): + shebang_encoding = 'utf-8' +else: + shebang_encoding = sys.getfilesystemencoding() + + +class ZipAppError(ValueError): + pass + + +@contextlib.contextmanager +def _maybe_open(archive, mode): + if isinstance(archive, str): + with open(archive, mode) as f: + yield f + else: + yield archive + + +def _write_file_prefix(f, interpreter): + """Write a shebang line.""" + if interpreter: + shebang = b'#!%b\n' % (interpreter.encode(shebang_encoding),) + f.write(shebang) + + +def _copy_archive(archive, new_archive, interpreter=None): + """Copy an application archive, modifying the shebang line.""" + with _maybe_open(archive, 'rb') as src: + # Skip the shebang line from the source. + # Read 2 bytes of the source and check if they are #!. + first_2 = src.read(2) + if first_2 == b'#!': + # Discard the initial 2 bytes and the rest of the shebang line. + first_2 = b'' + src.readline() + + with _maybe_open(new_archive, 'wb') as dst: + _write_file_prefix(dst, interpreter) + # If there was no shebang, "first_2" contains the first 2 bytes + # of the source file, so write them before copying the rest + # of the file. + dst.write(first_2) + shutil.copyfileobj(src, dst) + + if interpreter and isinstance(new_archive, str): + os.chmod(new_archive, os.stat(new_archive).st_mode | stat.S_IEXEC) + + +def create_archive(source, target=None, interpreter=None, main=None): + """Create an application archive from SOURCE. + + The SOURCE can be the name of a directory, or a filename or a file-like + object referring to an existing archive. + + The content of SOURCE is packed into an application archive in TARGET, + which can be a filename or a file-like object. If SOURCE is a directory, + TARGET can be omitted and will default to the name of SOURCE with .pyz + appended. + + The created application archive will have a shebang line specifying + that it should run with INTERPRETER (there will be no shebang line if + INTERPRETER is None), and a __main__.py which runs MAIN (if MAIN is + not specified, an existing __main__.py will be used). It is an to specify + MAIN for anything other than a directory source with no __main__.py, and it + is an error to omit MAIN if the directory has no __main__.py. + """ + # Are we copying an existing archive? + if not (isinstance(source, str) and os.path.isdir(source)): + _copy_archive(source, target, interpreter) + return + + # We are creating a new archive from a directory + has_main = os.path.exists(os.path.join(source, '__main__.py')) + if main and has_main: + raise ZipAppError( + "Cannot specify entry point if the source has __main__.py") + if not (main or has_main): + raise ZipAppError("Archive has no entry point") + + main_py = None + if main: + # Check that main has the right format + mod, sep, fn = main.partition(':') + mod_ok = all(part.isidentifier() for part in mod.split('.')) + fn_ok = all(part.isidentifier() for part in fn.split('.')) + if not (sep == ':' and mod_ok and fn_ok): + raise ZipAppError("Invalid entry point: " + main) + main_py = MAIN_TEMPLATE.format(module=mod, fn=fn) + + if target is None: + target = source + '.pyz' + + with _maybe_open(target, 'wb') as fd: + _write_file_prefix(fd, interpreter) + with zipfile.ZipFile(fd, 'w') as z: + root = pathlib.Path(source) + for child in root.rglob('*'): + arcname = str(child.relative_to(root)) + z.write(str(child), arcname) + if main_py: + z.writestr('__main__.py', main_py.encode('utf-8')) + + if interpreter and isinstance(target, str): + os.chmod(target, os.stat(target).st_mode | stat.S_IEXEC) + + +def get_interpreter(archive): + with _maybe_open(archive, 'rb') as f: + if f.read(2) == b'#!': + return f.readline().strip().decode(shebang_encoding) + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--output', '-o', default=None, + help="The name of the output archive. " + "Required if SOURCE is an archive.") + parser.add_argument('--python', '-p', default=None, + help="The name of the Python interpreter to use " + "(default: no shebang line).") + parser.add_argument('--main', '-m', default=None, + help="The main function of the application " + "(default: use an existing __main__.py).") + parser.add_argument('--info', default=False, action='store_true', + help="Display the interpreter from the archive.") + parser.add_argument('source', + help="Source directory (or existing archive).") + + args = parser.parse_args() + + # Handle `python -m zipapp archive.pyz --info`. + if args.info: + if not os.path.isfile(args.source): + raise SystemExit("Can only get info for an archive file") + interpreter = get_interpreter(args.source) + print("Interpreter: {}".format(interpreter or "<none>")) + sys.exit(0) + + if os.path.isfile(args.source): + if args.output is None or os.path.samefile(args.source, args.output): + raise SystemExit("In-place editing of archives is not supported") + if args.main: + raise SystemExit("Cannot change the main function when copying") + + create_archive(args.source, args.output, + interpreter=args.python, main=args.main) + + +if __name__ == '__main__': + main() |