gh-105549: Tokenize separately NUMBER and NAME tokens and allow 0-prefixed literals (#105555)

author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2023-06-09 20:39:01 (GMT)
committer: GitHub <noreply@github.com> 2023-06-09 20:39:01 (GMT)
commit: b047fa5e56ba725362c64ca3d6fccbdcf51d0cab (patch)
tree: 7059182f1b40011c8051fc53a5cdb7eb2f180e15 /Lib/test/test_tokenize.py
parent: 00b599ab5a76023fa0083d7cc5d3c569342a5191 (diff)
download: cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.zip
cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.tar.gz
cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.tar.bz2
1 files changed, 33 insertions, 0 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 2c124f0..df9c9db 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -284,7 +284,12 @@ def k(x):
                 # this won't work with compound complex inputs
                 continue
             self.assertEqual(number_token(lit), lit)
+        # Valid cases with extra underscores in the tokenize module
+        # See gh-105549 for context
+        extra_valid_cases = {"0_7", "09_99"}
         for lit in INVALID_UNDERSCORE_LITERALS:
+            if lit in extra_valid_cases:
+                continue
             try:
                 number_token(lit)
             except TokenError:
@@ -1873,6 +1878,34 @@ class TestRoundtrip(TestCase):
         self.check_roundtrip(code)
 
 
+class InvalidPythonTests(TestCase):
+    def test_number_followed_by_name(self):
+        # See issue #gh-105549
+        source = "2sin(x)"
+        expected_tokens = [
+            TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
+            TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
+            TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
+            TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
+            TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
+            TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
+            TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+        ]
+
+        tokens = list(generate_tokens(StringIO(source).readline))
+        self.assertEqual(tokens, expected_tokens)
+
+    def test_number_starting_with_zero(self):
+        source = "01234"
+        expected_tokens = [
+            TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
+            TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
+            TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
+        ]
+
+        tokens = list(generate_tokens(StringIO(source).readline))
+        self.assertEqual(tokens, expected_tokens)
+
 class CTokenizeTest(TestCase):
     def check_tokenize(self, s, expected):
         # Format the tokens in s in a table format.
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2023-06-09 20:39:01 (GMT)
committer	GitHub <noreply@github.com>	2023-06-09 20:39:01 (GMT)
commit	b047fa5e56ba725362c64ca3d6fccbdcf51d0cab (patch)
tree	7059182f1b40011c8051fc53a5cdb7eb2f180e15 /Lib/test/test_tokenize.py
parent	00b599ab5a76023fa0083d7cc5d3c569342a5191 (diff)
download	cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.zip cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.tar.gz cpython-b047fa5e56ba725362c64ca3d6fccbdcf51d0cab.tar.bz2