diff options
author | Benjamin Peterson <benjamin@python.org> | 2018-07-07 03:41:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-07 03:41:06 (GMT) |
commit | b0274f2cddd36b49fe5080efbe160277ef546471 (patch) | |
tree | 6a13b9a5c333d79953a0d14af161c9b278c3b78c /Lib/imp.py | |
parent | e25399b40cd15620e77c9ad2ed24549006ae9b47 (diff) | |
download | cpython-b0274f2cddd36b49fe5080efbe160277ef546471.zip cpython-b0274f2cddd36b49fe5080efbe160277ef546471.tar.gz cpython-b0274f2cddd36b49fe5080efbe160277ef546471.tar.bz2 |
closes bpo-34056: Always return bytes from _HackedGetData.get_data(). (GH-8130)
* Always return bytes from _HackedGetData.get_data().
Ensure the imp.load_source shim always returns bytes by reopening the file in
binary mode if needed. Hash-based pycs have to receive the source code in bytes.
It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but
that breaks some stdlib tests and likely 3rdparty code, too.
Diffstat (limited to 'Lib/imp.py')
-rw-r--r-- | Lib/imp.py | 13 |
1 files changed, 6 insertions, 7 deletions
@@ -142,17 +142,16 @@ class _HackedGetData: def get_data(self, path): """Gross hack to contort loader to deal w/ load_*()'s bad API.""" if self.file and path == self.path: + # The contract of get_data() requires us to return bytes. Reopen the + # file in binary mode if needed. if not self.file.closed: file = self.file - else: - self.file = file = open(self.path, 'r') + if 'b' not in file.mode: + file.close() + if self.file.closed: + self.file = file = open(self.path, 'rb') with file: - # Technically should be returning bytes, but - # SourceLoader.get_code() just passed what is returned to - # compile() which can handle str. And converting to bytes would - # require figuring out the encoding to decode to and - # tokenize.detect_encoding() only accepts bytes. return file.read() else: return super().get_data(path) |