summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSemyon Moroz <donbarbos@proton.me>2025-03-27 16:04:16 (GMT)
committerGitHub <noreply@github.com>2025-03-27 16:04:16 (GMT)
commit9b83670f0f183a69440972765cae5942a0bd9fd7 (patch)
treee193e4047d696b6279d86b31200d1e08aa8cc207
parent8614f86b7163b1c39798b481902dbb511292a537 (diff)
downloadcpython-9b83670f0f183a69440972765cae5942a0bd9fd7.zip
cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.gz
cpython-9b83670f0f183a69440972765cae5942a0bd9fd7.tar.bz2
gh-131178: Add tests for `tokenize` command-line interface (#131274)
-rw-r--r--Lib/test/test_tokenize.py80
-rw-r--r--Lib/tokenize.py6
2 files changed, 83 insertions, 3 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 5fa4e0d..df2617c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,5 +1,7 @@
+import contextlib
import os
import re
+import tempfile
import token
import tokenize
import unittest
@@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
run_test_script(file_name)
+class CommandLineTest(unittest.TestCase):
+ def setUp(self):
+ self.filename = tempfile.mktemp()
+ self.addCleanup(os_helper.unlink, self.filename)
+
+ @staticmethod
+ def text_normalize(string):
+ """Dedent *string* and strip it from its surrounding whitespaces.
+
+ This method is used by the other utility functions so that any
+ string to write or to match against can be freely indented.
+ """
+ return re.sub(r'\s+', ' ', string).strip()
+
+ def set_source(self, content):
+ with open(self.filename, 'w') as fp:
+ fp.write(content)
+
+ def invoke_tokenize(self, *flags):
+ output = StringIO()
+ with contextlib.redirect_stdout(output):
+ tokenize._main(args=[*flags, self.filename])
+ return self.text_normalize(output.getvalue())
+
+ def check_output(self, source, expect, *flags):
+ with self.subTest(source=source, flags=flags):
+ self.set_source(source)
+ res = self.invoke_tokenize(*flags)
+ expect = self.text_normalize(expect)
+ self.assertListEqual(res.splitlines(), expect.splitlines())
+
+ def test_invocation(self):
+ # test various combinations of parameters
+ base_flags = ('-e', '--exact')
+
+ self.set_source('''
+ def f():
+ print(x)
+ return None
+ ''')
+
+ for flag in base_flags:
+ with self.subTest(args=flag):
+ _ = self.invoke_tokenize(flag)
+
+ with self.assertRaises(SystemExit):
+ # suppress argparse error message
+ with contextlib.redirect_stderr(StringIO()):
+ _ = self.invoke_tokenize('--unknown')
+
+ def test_without_flag(self):
+ # test 'python -m tokenize source.py'
+ source = 'a = 1'
+ expect = '''
+ 0,0-0,0: ENCODING 'utf-8'
+ 1,0-1,1: NAME 'a'
+ 1,2-1,3: OP '='
+ 1,4-1,5: NUMBER '1'
+ 1,5-1,6: NEWLINE ''
+ 2,0-2,0: ENDMARKER ''
+ '''
+ self.check_output(source, expect)
+
+ def test_exact_flag(self):
+ # test 'python -m tokenize -e/--exact source.py'
+ source = 'a = 1'
+ expect = '''
+ 0,0-0,0: ENCODING 'utf-8'
+ 1,0-1,1: NAME 'a'
+ 1,2-1,3: EQUAL '='
+ 1,4-1,5: NUMBER '1'
+ 1,5-1,6: NEWLINE ''
+ 2,0-2,0: ENDMARKER ''
+ '''
+ for flag in ['-e', '--exact']:
+ self.check_output(source, expect, flag)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9ce95a62..7afacff 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -499,7 +499,7 @@ def generate_tokens(readline):
"""
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
-def main():
+def _main(args=None):
import argparse
# Helper error handling routines
@@ -524,7 +524,7 @@ def main():
help='the file to tokenize; defaults to stdin')
parser.add_argument('-e', '--exact', dest='exact', action='store_true',
help='display token names using the exact type')
- args = parser.parse_args()
+ args = parser.parse_args(args)
try:
# Tokenize the input
@@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
if __name__ == "__main__":
- main()
+ _main()