summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_tokenize.py40
-rw-r--r--Lib/tokenize.py53
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst2
3 files changed, 85 insertions, 10 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 21e8637..4428e8c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1877,6 +1877,43 @@ class TestRoundtrip(TestCase):
" print('Can not import' # comment2\n)"
"else: print('Loaded')\n")
+ self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
+ self.check_roundtrip(r"f'\\N{SNAKE}'")
+ self.check_roundtrip(r"f'\\N{{SNAKE}}'")
+ self.check_roundtrip(r"f'\N{SNAKE}'")
+ self.check_roundtrip(r"f'\\\N{SNAKE}'")
+ self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
+ self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
+
+ self.check_roundtrip(r"f'\\N{1}'")
+ self.check_roundtrip(r"f'\\\\N{2}'")
+ self.check_roundtrip(r"f'\\\\\\N{3}'")
+ self.check_roundtrip(r"f'\\\\\\\\N{4}'")
+
+ self.check_roundtrip(r"f'\\N{{'")
+ self.check_roundtrip(r"f'\\\\N{{'")
+ self.check_roundtrip(r"f'\\\\\\N{{'")
+ self.check_roundtrip(r"f'\\\\\\\\N{{'")
+ cases = [
+ """
+if 1:
+ "foo"
+"bar"
+""",
+ """
+if 1:
+ ("foo"
+ "bar")
+""",
+ """
+if 1:
+ "foo"
+ "bar"
+""" ]
+ for case in cases:
+ self.check_roundtrip(case)
+
+
def test_continuation(self):
# Balancing continuation
self.check_roundtrip("a = (3,4, \n"
@@ -1911,9 +1948,6 @@ class TestRoundtrip(TestCase):
tempdir = os.path.dirname(__file__) or os.curdir
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
- # TODO: Remove this once we can untokenize PEP 701 syntax
- testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
-
if not support.is_resource_enabled("cpu"):
testfiles = random.sample(testfiles, 10)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 0ab1893..7f418bb 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -168,6 +168,7 @@ class Untokenizer:
self.tokens = []
self.prev_row = 1
self.prev_col = 0
+ self.prev_type = None
self.encoding = None
def add_whitespace(self, start):
@@ -183,6 +184,29 @@ class Untokenizer:
if col_offset:
self.tokens.append(" " * col_offset)
+ def escape_brackets(self, token):
+ characters = []
+ consume_until_next_bracket = False
+ for character in token:
+ if character == "}":
+ if consume_until_next_bracket:
+ consume_until_next_bracket = False
+ else:
+ characters.append(character)
+ if character == "{":
+ n_backslashes = sum(
+ 1 for char in _itertools.takewhile(
+ "\\".__eq__,
+ characters[-2::-1]
+ )
+ )
+ if n_backslashes % 2 == 0:
+ characters.append(character)
+ else:
+ consume_until_next_bracket = True
+ characters.append(character)
+ return "".join(characters)
+
def untokenize(self, iterable):
it = iter(iterable)
indents = []
@@ -214,11 +238,13 @@ class Untokenizer:
startline = False
elif tok_type == FSTRING_MIDDLE:
if '{' in token or '}' in token:
+ token = self.escape_brackets(token)
+ last_line = token.splitlines()[-1]
end_line, end_col = end
- end = (end_line, end_col + token.count('{') + token.count('}'))
- token = re.sub('{', '{{', token)
- token = re.sub('}', '}}', token)
-
+ extra_chars = last_line.count("{{") + last_line.count("}}")
+ end = (end_line, end_col + extra_chars)
+ elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
+ self.tokens.append(" ")
self.add_whitespace(start)
self.tokens.append(token)
@@ -226,6 +252,7 @@ class Untokenizer:
if tok_type in (NEWLINE, NL):
self.prev_row += 1
self.prev_col = 0
+ self.prev_type = tok_type
return "".join(self.tokens)
def compat(self, token, iterable):
@@ -233,6 +260,7 @@ class Untokenizer:
toks_append = self.tokens.append
startline = token[0] in (NEWLINE, NL)
prevstring = False
+ in_fstring = 0
for tok in _itertools.chain([token], iterable):
toknum, tokval = tok[:2]
@@ -251,6 +279,10 @@ class Untokenizer:
else:
prevstring = False
+ if toknum == FSTRING_START:
+ in_fstring += 1
+ elif toknum == FSTRING_END:
+ in_fstring -= 1
if toknum == INDENT:
indents.append(tokval)
continue
@@ -263,11 +295,18 @@ class Untokenizer:
toks_append(indents[-1])
startline = False
elif toknum == FSTRING_MIDDLE:
- if '{' in tokval or '}' in tokval:
- tokval = re.sub('{', '{{', tokval)
- tokval = re.sub('}', '}}', tokval)
+ tokval = self.escape_brackets(tokval)
+
+ # Insert a space between two consecutive brackets if we are in an f-string
+ if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
+ tokval = ' ' + tokval
+
+ # Insert a space between two consecutive f-strings
+ if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
+ self.tokens.append(" ")
toks_append(tokval)
+ self.prev_type = toknum
def untokenize(iterable):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst
new file mode 100644
index 0000000..045596b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst
@@ -0,0 +1,2 @@
+Fix a bug that was causing the :func:`tokenize.untokenize` function to
+handle unicode named literals incorrectly. Patch by Pablo Galindo