summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorBrett Cannon <brett@python.org>2013-06-16 22:37:53 (GMT)
committerBrett Cannon <brett@python.org>2013-06-16 22:37:53 (GMT)
commitf24fecd4ac9050799d02a8354b7acfa12b65b1d3 (patch)
tree332e7e70c73bfbe61754e95a173aadbbe6e004a4 /Lib
parentf4375ef4d458bf24610ffef591f8197a3dbf0b35 (diff)
downloadcpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.zip
cpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.tar.gz
cpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.tar.bz2
Issue #18076: Introduce imoportlib.util.decode_source().
The helper function makes it easier to implement imoprtlib.abc.InspectLoader.get_source() by making that function require just the raw bytes for source code and handling all other details.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/importlib/_bootstrap.py18
-rw-r--r--Lib/importlib/util.py1
-rw-r--r--Lib/test/test_importlib/test_util.py21
3 files changed, 35 insertions, 5 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
index 0668dbd..37fe083 100644
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0):
return data
+def decode_source(source_bytes):
+ """Decode bytes representing source code and return the string.
+
+ Universal newline support is used in the decoding.
+ """
+ import tokenize # To avoid bootstrap issues.
+ source_bytes_readline = _io.BytesIO(source_bytes).readline
+ encoding = tokenize.detect_encoding(source_bytes_readline)
+ newline_decoder = _io.IncrementalNewlineDecoder(None, True)
+ return newline_decoder.decode(source_bytes.decode(encoding[0]))
+
+
# Loaders #####################################################################
class BuiltinImporter:
@@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics):
except OSError as exc:
raise ImportError("source not available through get_data()",
name=fullname) from exc
- import tokenize
- readsource = _io.BytesIO(source_bytes).readline
- encoding = tokenize.detect_encoding(readsource)
- newline_decoder = _io.IncrementalNewlineDecoder(None, True)
- return newline_decoder.decode(source_bytes.decode(encoding[0]))
+ return decode_source(source_bytes)
def source_to_code(self, data, path, *, _optimize=-1):
"""Return the code object compiled from source.
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py
index 06f4d2f..7727f9d 100644
--- a/Lib/importlib/util.py
+++ b/Lib/importlib/util.py
@@ -2,6 +2,7 @@
from ._bootstrap import MAGIC_NUMBER
from ._bootstrap import cache_from_source
+from ._bootstrap import decode_source
from ._bootstrap import module_to_load
from ._bootstrap import set_loader
from ._bootstrap import set_package
diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py
index 111607b..5fcbdae 100644
--- a/Lib/test/test_importlib/test_util.py
+++ b/Lib/test/test_importlib/test_util.py
@@ -9,6 +9,27 @@ import unittest
import warnings
+class DecodeSourceBytesTests(unittest.TestCase):
+
+ source = "string ='ΓΌ'"
+
+ def test_ut8_default(self):
+ source_bytes = self.source.encode('utf-8')
+ self.assertEqual(util.decode_source(source_bytes), self.source)
+
+ def test_specified_encoding(self):
+ source = '# coding=latin-1\n' + self.source
+ source_bytes = source.encode('latin-1')
+ assert source_bytes != source.encode('utf-8')
+ self.assertEqual(util.decode_source(source_bytes), source)
+
+ def test_universal_newlines(self):
+ source = '\r\n'.join([self.source, self.source])
+ source_bytes = source.encode('utf-8')
+ self.assertEqual(util.decode_source(source_bytes),
+ '\n'.join([self.source, self.source]))
+
+
class ModuleToLoadTests(unittest.TestCase):
module_name = 'ModuleManagerTest_module'