summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCyberSaxosTiGER <cybersaxostiger@gmail.com>2020-05-18 18:41:35 (GMT)
committerGitHub <noreply@github.com>2020-05-18 18:41:35 (GMT)
commitd71a6492dbd5434dfa6a0ad95e3ad98aa690887a (patch)
tree5d356a79c8e84a44ec2944b940b584625e9a6310
parentdc31800f86fbcd40ee616984820b885d8adaa6a7 (diff)
downloadcpython-d71a6492dbd5434dfa6a0ad95e3ad98aa690887a.zip
cpython-d71a6492dbd5434dfa6a0ad95e3ad98aa690887a.tar.gz
cpython-d71a6492dbd5434dfa6a0ad95e3ad98aa690887a.tar.bz2
bpo-38870: correctly escape unprintable characters on ast.unparse (GH-20166)
Unprintable characters such as `\x00` weren't correctly roundtripped due to not using default string repr when generating docstrings. This patch correctly encodes all unprintable characters (except `\n` and `\t`, which are commonly used for formatting, and found unescaped). Co-authored-by: Pablo Galindo <Pablogsal@gmail.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
-rw-r--r--Lib/ast.py14
-rw-r--r--Lib/test/test_unparse.py6
2 files changed, 16 insertions, 4 deletions
diff --git a/Lib/ast.py b/Lib/ast.py
index 0d3b19d..2edb717 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -1090,6 +1090,15 @@ class _Unparser(NodeVisitor):
self.write(node.id)
def _write_docstring(self, node):
+ def esc_char(c):
+ if c in ("\n", "\t"):
+ # In the AST form, we don't know the author's intentation
+ # about how this should be displayed. We'll only escape
+ # \n and \t, because they are more likely to be unescaped
+ # in the source
+ return c
+ return c.encode('unicode_escape').decode('ascii')
+
self.fill()
if node.kind == "u":
self.write("u")
@@ -1097,11 +1106,10 @@ class _Unparser(NodeVisitor):
value = node.value
if value:
# Preserve quotes in the docstring by escaping them
- value = value.replace("\\", "\\\\")
- value = value.replace('"""', '""\"')
- value = value.replace("\r", "\\r")
+ value = "".join(map(esc_char, value))
if value[-1] == '"':
value = value.replace('"', '\\"', -1)
+ value = value.replace('"""', '""\\"')
self.write(f'"""{value}"""')
diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py
index 67dcb1d..6d82872 100644
--- a/Lib/test/test_unparse.py
+++ b/Lib/test/test_unparse.py
@@ -324,7 +324,11 @@ class UnparseTestCase(ASTTestCase):
'\\t',
'\n',
'\\n',
- '\r\\r\t\\t\n\\n'
+ '\r\\r\t\\t\n\\n',
+ '""">>> content = \"\"\"blabla\"\"\" <<<"""',
+ r'foo\n\x00',
+ '🐍⛎𩸽üéş^\X\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
+
)
for docstring in docstrings:
# check as Module docstrings for easy testing