summaryrefslogtreecommitdiffstats
path: root/Lib/distutils/command/build_scripts.py
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-05-10 22:14:28 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-05-10 22:14:28 (GMT)
commit1947477072c22760ad324cf571d36cccc3fc1213 (patch)
tree676583f93b3ad00e590cd760169e2f9cb4b59166 /Lib/distutils/command/build_scripts.py
parentcd9dd3797462b8827f5289e84cd9daa1488783eb (diff)
downloadcpython-1947477072c22760ad324cf571d36cccc3fc1213.zip
cpython-1947477072c22760ad324cf571d36cccc3fc1213.tar.gz
cpython-1947477072c22760ad324cf571d36cccc3fc1213.tar.bz2
Close #10419, issue #6011: build_scripts command of distutils handles correctly
non-ASCII path (path to the Python executable). Open and write the script in binary mode, but ensure that the shebang is decodable from UTF-8 and from the encoding of the script.
Diffstat (limited to 'Lib/distutils/command/build_scripts.py')
-rw-r--r--Lib/distutils/command/build_scripts.py45
1 files changed, 34 insertions, 11 deletions
diff --git a/Lib/distutils/command/build_scripts.py b/Lib/distutils/command/build_scripts.py
index 8b08bfe..a43a7c3 100644
--- a/Lib/distutils/command/build_scripts.py
+++ b/Lib/distutils/command/build_scripts.py
@@ -11,9 +11,10 @@ from distutils.core import Command
from distutils.dep_util import newer
from distutils.util import convert_path, Mixin2to3
from distutils import log
+import tokenize
# check if Python is called on the first line with this expression
-first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
+first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
class build_scripts(Command):
@@ -74,12 +75,14 @@ class build_scripts(Command):
# that way, we'll get accurate feedback if we can read the
# script.
try:
- f = open(script, "r")
+ f = open(script, "rb")
except IOError:
if not self.dry_run:
raise
f = None
else:
+ encoding, lines = tokenize.detect_encoding(f.readline)
+ f.seek(0)
first_line = f.readline()
if not first_line:
self.warn("%s is an empty file (skipping)" % script)
@@ -88,25 +91,45 @@ class build_scripts(Command):
match = first_line_re.match(first_line)
if match:
adjust = True
- post_interp = match.group(1) or ''
+ post_interp = match.group(1) or b''
if adjust:
log.info("copying and adjusting %s -> %s", script,
self.build_dir)
updated_files.append(outfile)
if not self.dry_run:
- outf = open(outfile, "w")
if not sysconfig.python_build:
- outf.write("#!%s%s\n" %
- (self.executable,
- post_interp))
+ executable = self.executable
else:
- outf.write("#!%s%s\n" %
- (os.path.join(
+ executable = os.path.join(
sysconfig.get_config_var("BINDIR"),
"python%s%s" % (sysconfig.get_config_var("VERSION"),
- sysconfig.get_config_var("EXE"))),
- post_interp))
+ sysconfig.get_config_var("EXE")))
+ executable = os.fsencode(executable)
+ shebang = b"#!" + executable + post_interp + b"\n"
+ # Python parser starts to read a script using UTF-8 until
+ # it gets a #coding:xxx cookie. The shebang has to be the
+ # first line of a file, the #coding:xxx cookie cannot be
+ # written before. So the shebang has to be decodable from
+ # UTF-8.
+ try:
+ shebang.decode('utf-8')
+ except UnicodeDecodeError:
+ raise ValueError(
+ "The shebang ({!r}) is not decodable "
+ "from utf-8".format(shebang))
+ # If the script is encoded to a custom encoding (use a
+ # #coding:xxx cookie), the shebang has to be decodable from
+ # the script encoding too.
+ try:
+ shebang.decode(encoding)
+ except UnicodeDecodeError:
+ raise ValueError(
+ "The shebang ({!r}) is not decodable "
+ "from the script encoding ({})"
+ .format(shebang, encoding))
+ outf = open(outfile, "wb")
+ outf.write(shebang)
outf.writelines(f.readlines())
outf.close()
if f: