summaryrefslogtreecommitdiffstats
path: root/Lib/compileall.py
diff options
context:
space:
mode:
authorLumír 'Frenzy' Balhar <lbalhar@redhat.com>2020-05-14 14:17:22 (GMT)
committerGitHub <noreply@github.com>2020-05-14 14:17:22 (GMT)
commite77d428856fbd339faee44ff47214eda5fb51d57 (patch)
treef6da10d4c4b9d438107f665760a0ba79810f4e49 /Lib/compileall.py
parent7443d42021d433da0497f8ba651daa47e7dc1991 (diff)
downloadcpython-e77d428856fbd339faee44ff47214eda5fb51d57.zip
cpython-e77d428856fbd339faee44ff47214eda5fb51d57.tar.gz
cpython-e77d428856fbd339faee44ff47214eda5fb51d57.tar.bz2
bpo-40495: compileall option to hardlink duplicate pyc files (GH-19901)
compileall is now able to use hardlinks to prevent duplicates in a case when .pyc files for different optimization levels have the same content. Co-authored-by: Miro Hrončok <miro@hroncok.cz> Co-authored-by: Victor Stinner <vstinner@python.org>
Diffstat (limited to 'Lib/compileall.py')
-rw-r--r--Lib/compileall.py42
1 files changed, 35 insertions, 7 deletions
diff --git a/Lib/compileall.py b/Lib/compileall.py
index abe6cff..fe7f450 100644
--- a/Lib/compileall.py
+++ b/Lib/compileall.py
@@ -15,6 +15,7 @@ import sys
import importlib.util
import py_compile
import struct
+import filecmp
from functools import partial
from pathlib import Path
@@ -47,7 +48,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=None, *, stripdir=None,
- prependdir=None, limit_sl_dest=None):
+ prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
@@ -70,6 +71,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path
+ hardlink_dupes: hardlink duplicated pyc files
"""
ProcessPoolExecutor = None
if ddir is not None and (stripdir is not None or prependdir is not None):
@@ -104,7 +106,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
invalidation_mode=invalidation_mode,
stripdir=stripdir,
prependdir=prependdir,
- limit_sl_dest=limit_sl_dest),
+ limit_sl_dest=limit_sl_dest,
+ hardlink_dupes=hardlink_dupes),
files)
success = min(results, default=True)
else:
@@ -112,14 +115,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize, invalidation_mode,
stripdir=stripdir, prependdir=prependdir,
- limit_sl_dest=limit_sl_dest):
+ limit_sl_dest=limit_sl_dest,
+ hardlink_dupes=hardlink_dupes):
success = False
return success
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=None, *, stripdir=None, prependdir=None,
- limit_sl_dest=None):
+ limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile one file.
Arguments (only fullname is required):
@@ -140,6 +144,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path.
+ hardlink_dupes: hardlink duplicated pyc files
"""
if ddir is not None and (stripdir is not None or prependdir is not None):
@@ -176,6 +181,14 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if isinstance(optimize, int):
optimize = [optimize]
+ # Use set() to remove duplicates.
+ # Use sorted() to create pyc files in a deterministic order.
+ optimize = sorted(set(optimize))
+
+ if hardlink_dupes and len(optimize) < 2:
+ raise ValueError("Hardlinking of duplicated bytecode makes sense "
+ "only for more than one optimization level")
+
if rx is not None:
mo = rx.search(fullname)
if mo:
@@ -220,10 +233,16 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not quiet:
print('Compiling {!r}...'.format(fullname))
try:
- for opt_level, cfile in opt_cfiles.items():
+ for index, opt_level in enumerate(optimize):
+ cfile = opt_cfiles[opt_level]
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level,
invalidation_mode=invalidation_mode)
+ if index > 0 and hardlink_dupes:
+ previous_cfile = opt_cfiles[optimize[index - 1]]
+ if filecmp.cmp(cfile, previous_cfile, shallow=False):
+ os.unlink(cfile)
+ os.link(previous_cfile, cfile)
except py_compile.PyCompileError as err:
success = False
if quiet >= 2:
@@ -352,6 +371,9 @@ def main():
'Python interpreter itself (specified by -O).'))
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
help='Ignore symlinks pointing outsite of the DIR')
+ parser.add_argument('--hardlink-dupes', action='store_true',
+ dest='hardlink_dupes',
+ help='Hardlink duplicated pyc files')
args = parser.parse_args()
compile_dests = args.compile_dest
@@ -371,6 +393,10 @@ def main():
if args.opt_levels is None:
args.opt_levels = [-1]
+ if len(args.opt_levels) == 1 and args.hardlink_dupes:
+ parser.error(("Hardlinking of duplicated bytecode makes sense "
+ "only for more than one optimization level."))
+
if args.ddir is not None and (
args.stripdir is not None or args.prependdir is not None
):
@@ -404,7 +430,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
- limit_sl_dest=args.limit_sl_dest):
+ limit_sl_dest=args.limit_sl_dest,
+ hardlink_dupes=args.hardlink_dupes):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
@@ -414,7 +441,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
- limit_sl_dest=args.limit_sl_dest):
+ limit_sl_dest=args.limit_sl_dest,
+ hardlink_dupes=args.hardlink_dupes):
success = False
return success
else: