diff options
author | Brett Cannon <brett@python.org> | 2013-06-16 22:37:53 (GMT) |
---|---|---|
committer | Brett Cannon <brett@python.org> | 2013-06-16 22:37:53 (GMT) |
commit | f24fecd4ac9050799d02a8354b7acfa12b65b1d3 (patch) | |
tree | 332e7e70c73bfbe61754e95a173aadbbe6e004a4 /Lib | |
parent | f4375ef4d458bf24610ffef591f8197a3dbf0b35 (diff) | |
download | cpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.zip cpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.tar.gz cpython-f24fecd4ac9050799d02a8354b7acfa12b65b1d3.tar.bz2 |
Issue #18076: Introduce imoportlib.util.decode_source().
The helper function makes it easier to implement
imoprtlib.abc.InspectLoader.get_source() by making that function
require just the raw bytes for source code and handling all other
details.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/importlib/_bootstrap.py | 18 | ||||
-rw-r--r-- | Lib/importlib/util.py | 1 | ||||
-rw-r--r-- | Lib/test/test_importlib/test_util.py | 21 |
3 files changed, 35 insertions, 5 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 0668dbd..37fe083 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0): return data +def decode_source(source_bytes): + """Decode bytes representing source code and return the string. + + Universal newline support is used in the decoding. + """ + import tokenize # To avoid bootstrap issues. + source_bytes_readline = _io.BytesIO(source_bytes).readline + encoding = tokenize.detect_encoding(source_bytes_readline) + newline_decoder = _io.IncrementalNewlineDecoder(None, True) + return newline_decoder.decode(source_bytes.decode(encoding[0])) + + # Loaders ##################################################################### class BuiltinImporter: @@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics): except OSError as exc: raise ImportError("source not available through get_data()", name=fullname) from exc - import tokenize - readsource = _io.BytesIO(source_bytes).readline - encoding = tokenize.detect_encoding(readsource) - newline_decoder = _io.IncrementalNewlineDecoder(None, True) - return newline_decoder.decode(source_bytes.decode(encoding[0])) + return decode_source(source_bytes) def source_to_code(self, data, path, *, _optimize=-1): """Return the code object compiled from source. diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index 06f4d2f..7727f9d 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -2,6 +2,7 @@ from ._bootstrap import MAGIC_NUMBER from ._bootstrap import cache_from_source +from ._bootstrap import decode_source from ._bootstrap import module_to_load from ._bootstrap import set_loader from ._bootstrap import set_package diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py index 111607b..5fcbdae 100644 --- a/Lib/test/test_importlib/test_util.py +++ b/Lib/test/test_importlib/test_util.py @@ -9,6 +9,27 @@ import unittest import warnings +class DecodeSourceBytesTests(unittest.TestCase): + + source = "string ='ΓΌ'" + + def test_ut8_default(self): + source_bytes = self.source.encode('utf-8') + self.assertEqual(util.decode_source(source_bytes), self.source) + + def test_specified_encoding(self): + source = '# coding=latin-1\n' + self.source + source_bytes = source.encode('latin-1') + assert source_bytes != source.encode('utf-8') + self.assertEqual(util.decode_source(source_bytes), source) + + def test_universal_newlines(self): + source = '\r\n'.join([self.source, self.source]) + source_bytes = source.encode('utf-8') + self.assertEqual(util.decode_source(source_bytes), + '\n'.join([self.source, self.source])) + + class ModuleToLoadTests(unittest.TestCase): module_name = 'ModuleManagerTest_module' |