gh-131178: Add tests for `tokenize` command-line interface (#131274)

author: Semyon Moroz <donbarbos@proton.me> 2025-03-27 16:04:16 (GMT)
committer: GitHub <noreply@github.com> 2025-03-27 16:04:16 (GMT)
commit: 9b83670f0f183a69440972765cae5942a0bd9fd7 (patch)
tree: e193e4047d696b6279d86b31200d1e08aa8cc207
parent: 8614f86b7163b1c39798b481902dbb511292a537 (diff)
download: cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.zip
cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.gz
cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.bz2
2 files changed, 83 insertions, 3 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 5fa4e0d..df2617c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,5 +1,7 @@
+import contextlib
 import os
 import re
+import tempfile
 import token
 import tokenize
 import unittest
@@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
             run_test_script(file_name)
 
 
+class CommandLineTest(unittest.TestCase):
+    def setUp(self):
+        self.filename = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, self.filename)
+
+    @staticmethod
+    def text_normalize(string):
+        """Dedent *string* and strip it from its surrounding whitespaces.
+
+        This method is used by the other utility functions so that any
+        string to write or to match against can be freely indented.
+        """
+        return re.sub(r'\s+', ' ', string).strip()
+
+    def set_source(self, content):
+        with open(self.filename, 'w') as fp:
+            fp.write(content)
+
+    def invoke_tokenize(self, *flags):
+        output = StringIO()
+        with contextlib.redirect_stdout(output):
+            tokenize._main(args=[*flags, self.filename])
+        return self.text_normalize(output.getvalue())
+
+    def check_output(self, source, expect, *flags):
+        with self.subTest(source=source, flags=flags):
+            self.set_source(source)
+            res = self.invoke_tokenize(*flags)
+            expect = self.text_normalize(expect)
+            self.assertListEqual(res.splitlines(), expect.splitlines())
+
+    def test_invocation(self):
+        # test various combinations of parameters
+        base_flags = ('-e', '--exact')
+
+        self.set_source('''
+            def f():
+                print(x)
+                return None
+        ''')
+
+        for flag in base_flags:
+            with self.subTest(args=flag):
+                _ = self.invoke_tokenize(flag)
+
+        with self.assertRaises(SystemExit):
+            # suppress argparse error message
+            with contextlib.redirect_stderr(StringIO()):
+                _ = self.invoke_tokenize('--unknown')
+
+    def test_without_flag(self):
+        # test 'python -m tokenize source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            OP             '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        self.check_output(source, expect)
+
+    def test_exact_flag(self):
+        # test 'python -m tokenize -e/--exact source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            EQUAL          '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        for flag in ['-e', '--exact']:
+            self.check_output(source, expect, flag)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9ce95a62..7afacff 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -499,7 +499,7 @@ def generate_tokens(readline):
     """
     return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
 
-def main():
+def _main(args=None):
     import argparse
 
     # Helper error handling routines
@@ -524,7 +524,7 @@ def main():
                         help='the file to tokenize; defaults to stdin')
     parser.add_argument('-e', '--exact', dest='exact', action='store_true',
                         help='display token names using the exact type')
-    args = parser.parse_args()
+    args = parser.parse_args(args)
 
     try:
         # Tokenize the input
@@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
 
 
 if __name__ == "__main__":
-    main()
+    _main()
author	Semyon Moroz <donbarbos@proton.me>	2025-03-27 16:04:16 (GMT)
committer	GitHub <noreply@github.com>	2025-03-27 16:04:16 (GMT)
commit	9b83670f0f183a69440972765cae5942a0bd9fd7 (patch)
tree	e193e4047d696b6279d86b31200d1e08aa8cc207
parent	8614f86b7163b1c39798b481902dbb511292a537 (diff)
download	cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.zip cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.gz cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.bz2