diff options
author | Lysandros Nikolaou <lisandrosnik@gmail.com> | 2023-06-19 11:31:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-19 11:31:57 (GMT) |
commit | ab3823a97bdeefb0266b3c8d493f7f6223ce3686 (patch) | |
tree | 8d2e7b75d5d83cb85c3ead54aca3454776fde4ae | |
parent | 581619941ecce986a2fc8cbddd95256daa25fb26 (diff) | |
download | cpython-ab3823a97bdeefb0266b3c8d493f7f6223ce3686.zip cpython-ab3823a97bdeefb0266b3c8d493f7f6223ce3686.tar.gz cpython-ab3823a97bdeefb0266b3c8d493f7f6223ce3686.tar.bz2 |
gh-71299: Fix __all__ in tokenize (#105907)
Co-authored-by: Unit03
-rw-r--r-- | Lib/test/test_tokenize.py | 268 | ||||
-rw-r--r-- | Lib/tokenize.py | 4 |
2 files changed, 137 insertions, 135 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 5ad2784..97c4884 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,20 +1,16 @@ -from test import support -from test.support import os_helper -from tokenize import (tokenize, untokenize, NUMBER, NAME, OP, - STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, - open as tokenize_open, Untokenizer, generate_tokens, - NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo, - TokenError) -from io import BytesIO, StringIO +import os +import token +import tokenize import unittest +from io import BytesIO, StringIO from textwrap import dedent from unittest import TestCase, mock +from test import support +from test.support import os_helper from test.test_grammar import (VALID_UNDERSCORE_LITERALS, INVALID_UNDERSCORE_LITERALS) from test.support import os_helper from test.support.script_helper import run_test_script, make_script, run_python_until_end -import os -import token # Converts a source string into a list of textual representation # of the tokens such as: @@ -26,12 +22,12 @@ def stringify_tokens_from_source(token_generator, source_string): missing_trailing_nl = source_string[-1] not in '\r\n' for type, token, start, end, line in token_generator: - if type == ENDMARKER: + if type == tokenize.ENDMARKER: break # Ignore the new line on the last line if the input lacks one - if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: + if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines: continue - type = tok_name[type] + type = tokenize.tok_name[type] result.append(f" {type:10} {token!r:13} {start} {end}") return result @@ -47,7 +43,7 @@ class TokenizeTest(TestCase): # Format the tokens in s in a table format. # The ENDMARKER and final NEWLINE are omitted. f = BytesIO(s.encode('utf-8')) - result = stringify_tokens_from_source(tokenize(f.readline), s) + result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s) self.assertEqual(result, [" ENCODING 'utf-8' (0, 0) (0, 0)"] + expected.rstrip().splitlines()) @@ -57,27 +53,27 @@ class TokenizeTest(TestCase): yield "sdfosdg" yield "sdfosdg" with self.assertRaises(TypeError): - list(tokenize(gen().__next__)) + list(tokenize.tokenize(gen().__next__)) def gen(): yield b"sdfosdg" yield b"sdfosdg" with self.assertRaises(TypeError): - list(generate_tokens(gen().__next__)) + list(tokenize.generate_tokens(gen().__next__)) def gen(): yield "sdfosdg" 1/0 with self.assertRaises(ZeroDivisionError): - list(generate_tokens(gen().__next__)) + list(tokenize.generate_tokens(gen().__next__)) def test_implicit_newline(self): # Make sure that the tokenizer puts in an implicit NEWLINE # when the input lacks a trailing new line. f = BytesIO("x".encode('utf-8')) - tokens = list(tokenize(f.readline)) - self.assertEqual(tokens[-2].type, NEWLINE) - self.assertEqual(tokens[-1].type, ENDMARKER) + tokens = list(tokenize.tokenize(f.readline)) + self.assertEqual(tokens[-2].type, tokenize.NEWLINE) + self.assertEqual(tokens[-1].type, tokenize.ENDMARKER) def test_basic(self): self.check_tokenize("1 + 1", """\ @@ -139,7 +135,7 @@ def k(x): with self.assertRaisesRegex(IndentationError, "unindent does not match any " "outer indentation level") as e: - for tok in tokenize(readline): + for tok in tokenize.tokenize(readline): pass self.assertEqual(e.exception.lineno, 3) self.assertEqual(e.exception.filename, '<string>') @@ -275,8 +271,8 @@ def k(x): def test_underscore_literals(self): def number_token(s): f = BytesIO(s.encode('utf-8')) - for toktype, token, start, end, line in tokenize(f.readline): - if toktype == NUMBER: + for toktype, token, start, end, line in tokenize.tokenize(f.readline): + if toktype == tokenize.NUMBER: return token return 'invalid token' for lit in VALID_UNDERSCORE_LITERALS: @@ -292,7 +288,7 @@ def k(x): continue try: number_token(lit) - except TokenError: + except tokenize.TokenError: continue self.assertNotEqual(number_token(lit), lit) @@ -1150,24 +1146,24 @@ class GenerateTokensTest(TokenizeTest): # Format the tokens in s in a table format. # The ENDMARKER and final NEWLINE are omitted. f = StringIO(s) - result = stringify_tokens_from_source(generate_tokens(f.readline), s) + result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s) self.assertEqual(result, expected.rstrip().splitlines()) def decistmt(s): result = [] - g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string + g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string for toknum, tokval, _, _, _ in g: - if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + if toknum == tokenize.NUMBER and '.' in tokval: # replace NUMBER tokens result.extend([ - (NAME, 'Decimal'), - (OP, '('), - (STRING, repr(tokval)), - (OP, ')') + (tokenize.NAME, 'Decimal'), + (tokenize.OP, '('), + (tokenize.STRING, repr(tokval)), + (tokenize.OP, ')') ]) else: result.append((toknum, tokval)) - return untokenize(result).decode('utf-8').strip() + return tokenize.untokenize(result).decode('utf-8').strip() class TestMisc(TestCase): @@ -1191,6 +1187,13 @@ class TestMisc(TestCase): self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7')) + def test___all__(self): + expected = token.__all__ + [ + "TokenInfo", "TokenError", "generate_tokens", + "detect_encoding", "untokenize", "open", "tokenize", + ] + self.assertCountEqual(tokenize.__all__, expected) + class TestTokenizerAdheresToPep0263(TestCase): """ @@ -1245,9 +1248,10 @@ class Test_Tokenize(TestCase): yield b'' # skip the initial encoding token and the end tokens - tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8', - extra_tokens=True))[:-2] - expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] + tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__, + encoding='utf-8', + extra_tokens=True))[:-2] + expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] self.assertEqual(tokens, expected_tokens, "bytes not decoded with encoding") @@ -1271,7 +1275,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, list(lines[:2])) @@ -1281,7 +1285,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# something\n', b'print(something)\n']) @@ -1292,7 +1296,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'iso-8859-1') self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n']) @@ -1302,7 +1306,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'# coding=utf-8\n']) @@ -1313,7 +1317,7 @@ class TestDetectEncoding(TestCase): b'do_something(else)\n' ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_cookie_second_line_no_bom(self): lines = ( @@ -1322,7 +1326,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'ascii') expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n'] self.assertEqual(consumed_lines, expected) @@ -1334,7 +1338,7 @@ class TestDetectEncoding(TestCase): b'print(something)\n', b'do_something(else)\n' ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'#! something\n', b'f# coding=utf-8\n']) @@ -1347,7 +1351,7 @@ class TestDetectEncoding(TestCase): b'do_something(else)\n' ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_cookie_second_line_noncommented_first_line(self): lines = ( @@ -1355,7 +1359,7 @@ class TestDetectEncoding(TestCase): b'# vim: set fileencoding=iso8859-15 :\n', b"print('\xe2\x82\xac')\n" ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8') expected = [b"print('\xc2\xa3')\n"] self.assertEqual(consumed_lines, expected) @@ -1366,7 +1370,7 @@ class TestDetectEncoding(TestCase): b'# vim: set fileencoding=iso8859-15 :\n', b"print('\xe2\x82\xac')\n" ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'iso8859-15') expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] self.assertEqual(consumed_lines, expected) @@ -1377,7 +1381,7 @@ class TestDetectEncoding(TestCase): b'# vim: set fileencoding=iso8859-15 :\n', b"print('\xe2\x82\xac')\n" ) - encoding, consumed_lines = detect_encoding(self.get_readline(lines)) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'iso8859-15') expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] self.assertEqual(consumed_lines, expected) @@ -1394,7 +1398,7 @@ class TestDetectEncoding(TestCase): b"print(things)\n", b"do_something += 4\n") rl = self.get_readline(lines) - found, consumed_lines = detect_encoding(rl) + found, consumed_lines = tokenize.detect_encoding(rl) self.assertEqual(found, "iso-8859-1") def test_syntaxerror_latin1(self): @@ -1404,7 +1408,7 @@ class TestDetectEncoding(TestCase): b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S ) readline = self.get_readline(lines) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_utf8_normalization(self): @@ -1417,36 +1421,36 @@ class TestDetectEncoding(TestCase): b"# coding: " + enc.encode("ascii") + b"\n", b"1 + 3\n") rl = self.get_readline(lines) - found, consumed_lines = detect_encoding(rl) + found, consumed_lines = tokenize.detect_encoding(rl) self.assertEqual(found, "utf-8") def test_short_files(self): readline = self.get_readline((b'print(something)\n',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print(something)\n']) - encoding, consumed_lines = detect_encoding(self.get_readline(())) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(())) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, []) readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, [b'print(something)\n']) readline = self.get_readline((b'\xef\xbb\xbf',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, []) readline = self.get_readline((b'# coding: bad\n',)) - self.assertRaises(SyntaxError, detect_encoding, readline) + self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) def test_false_encoding(self): # Issue 18873: "Encoding" detected in non-comment lines readline = self.get_readline((b'print("#coding=fake")',)) - encoding, consumed_lines = detect_encoding(readline) + encoding, consumed_lines = tokenize.detect_encoding(readline) self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, [b'print("#coding=fake")']) @@ -1459,14 +1463,14 @@ class TestDetectEncoding(TestCase): with open(filename, 'w', encoding=encoding) as fp: print("# coding: %s" % encoding, file=fp) print("print('euro:\u20ac')", file=fp) - with tokenize_open(filename) as fp: + with tokenize.open(filename) as fp: self.assertEqual(fp.encoding, encoding) self.assertEqual(fp.mode, 'r') # test BOM (no coding cookie) with open(filename, 'w', encoding='utf-8-sig') as fp: print("print('euro:\u20ac')", file=fp) - with tokenize_open(filename) as fp: + with tokenize.open(filename) as fp: self.assertEqual(fp.encoding, 'utf-8-sig') self.assertEqual(fp.mode, 'r') @@ -1493,16 +1497,16 @@ class TestDetectEncoding(TestCase): ins = Bunk(lines, path) # Make sure lacking a name isn't an issue. del ins.name - detect_encoding(ins.readline) + tokenize.detect_encoding(ins.readline) with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)): ins = Bunk(lines, path) - detect_encoding(ins.readline) + tokenize.detect_encoding(ins.readline) def test_open_error(self): # Issue #23840: open() must close the binary file on error m = BytesIO(b'#coding:xxx') with mock.patch('tokenize._builtin_open', return_value=m): - self.assertRaises(SyntaxError, tokenize_open, 'foobar') + self.assertRaises(SyntaxError, tokenize.open, 'foobar') self.assertTrue(m.closed) @@ -1542,7 +1546,7 @@ class TestTokenize(TestCase): tokenize_module.detect_encoding = mock_detect_encoding tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize try: - results = tokenize(mock_readline) + results = tokenize.tokenize(mock_readline) self.assertEqual(list(results)[1:], [b'first', b'second', b'1', b'2', b'3', b'4']) finally: @@ -1559,23 +1563,23 @@ class TestTokenize(TestCase): buf = '\n'.join(buf) # Test that 500 consequent, one-line defs is OK - toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline)) + toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline)) self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER # [-2] is always NEWLINE def assertExactTypeEqual(self, opstr, *optypes): - tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline)) + tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline)) num_optypes = len(optypes) self.assertEqual(len(tokens), 3 + num_optypes) - self.assertEqual(tok_name[tokens[0].exact_type], - tok_name[ENCODING]) + self.assertEqual(tokenize.tok_name[tokens[0].exact_type], + tokenize.tok_name[tokenize.ENCODING]) for i in range(num_optypes): - self.assertEqual(tok_name[tokens[i + 1].exact_type], - tok_name[optypes[i]]) - self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type], - tok_name[token.NEWLINE]) - self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type], - tok_name[token.ENDMARKER]) + self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type], + tokenize.tok_name[optypes[i]]) + self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type], + tokenize.tok_name[token.NEWLINE]) + self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type], + tokenize.tok_name[token.ENDMARKER]) def test_exact_type(self): self.assertExactTypeEqual('()', token.LPAR, token.RPAR) @@ -1625,11 +1629,11 @@ class TestTokenize(TestCase): self.assertExactTypeEqual('@=', token.ATEQUAL) self.assertExactTypeEqual('a**2+b**2==c**2', - NAME, token.DOUBLESTAR, NUMBER, + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER, token.PLUS, - NAME, token.DOUBLESTAR, NUMBER, + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER, token.EQEQUAL, - NAME, token.DOUBLESTAR, NUMBER) + tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER) self.assertExactTypeEqual('{1, 2, 3}', token.LBRACE, token.NUMBER, token.COMMA, @@ -1650,32 +1654,32 @@ class TestTokenize(TestCase): # See http://bugs.python.org/issue44667 source = 'b = 1\n\n#test' expected_tokens = [ - TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), - TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'), - TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'), - TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'), - TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'), - TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'), - TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'), - TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'), - TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='') + tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'), + tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'), + tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'), + tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'), + tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'), + tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'), + tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='') ] - tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) + tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline)) self.assertEqual(tokens, expected_tokens) def test_newline_and_space_at_the_end_of_the_source_without_newline(self): # See https://github.com/python/cpython/issues/105435 source = 'a\n ' expected_tokens = [ - TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), - TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'), - TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'), - TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '), - TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='') + tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''), + tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'), + tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'), + tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '), + tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='') ] - tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) + tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline)) self.assertEqual(tokens, expected_tokens) def test_invalid_character_in_fstring_middle(self): @@ -1695,7 +1699,7 @@ class UntokenizeTest(TestCase): def test_bad_input_order(self): # raise if previous row - u = Untokenizer() + u = tokenize.Untokenizer() u.prev_row = 2 u.prev_col = 2 with self.assertRaises(ValueError) as cm: @@ -1707,7 +1711,7 @@ class UntokenizeTest(TestCase): def test_backslash_continuation(self): # The problem is that <whitespace>\<newline> leaves no token - u = Untokenizer() + u = tokenize.Untokenizer() u.prev_row = 1 u.prev_col = 1 u.tokens = [] @@ -1719,17 +1723,17 @@ class UntokenizeTest(TestCase): TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n') def test_iter_compat(self): - u = Untokenizer() - token = (NAME, 'Hello') - tokens = [(ENCODING, 'utf-8'), token] + u = tokenize.Untokenizer() + token = (tokenize.NAME, 'Hello') + tokens = [(tokenize.ENCODING, 'utf-8'), token] u.compat(token, iter([])) self.assertEqual(u.tokens, ["Hello "]) - u = Untokenizer() + u = tokenize.Untokenizer() self.assertEqual(u.untokenize(iter([token])), 'Hello ') - u = Untokenizer() + u = tokenize.Untokenizer() self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') self.assertEqual(u.encoding, 'utf-8') - self.assertEqual(untokenize(iter(tokens)), b'Hello ') + self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ') class TestRoundtrip(TestCase): @@ -1752,17 +1756,17 @@ class TestRoundtrip(TestCase): else: code = f.read() readline = iter(code.splitlines(keepends=True)).__next__ - tokens5 = list(tokenize(readline)) + tokens5 = list(tokenize.tokenize(readline)) tokens2 = [tok[:2] for tok in tokens5] # Reproduce tokens2 from pairs - bytes_from2 = untokenize(tokens2) + bytes_from2 = tokenize.untokenize(tokens2) readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ - tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] + tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)] self.assertEqual(tokens2_from2, tokens2) # Reproduce tokens2 from 5-tuples - bytes_from5 = untokenize(tokens5) + bytes_from5 = tokenize.untokenize(tokens5) readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ - tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] + tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)] self.assertEqual(tokens2_from5, tokens2) def check_line_extraction(self, f): @@ -1771,8 +1775,8 @@ class TestRoundtrip(TestCase): else: code = f.read() readline = iter(code.splitlines(keepends=True)).__next__ - for tok in tokenize(readline): - if tok.type in {ENCODING, ENDMARKER}: + for tok in tokenize.tokenize(readline): + if tok.type in {tokenize.ENCODING, tokenize.ENDMARKER}: continue self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]]) @@ -1878,7 +1882,7 @@ class TestRoundtrip(TestCase): def roundtrip(self, code): if isinstance(code, str): code = code.encode('utf-8') - return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') + return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8') def test_indentation_semantics_retained(self): """ @@ -1896,27 +1900,27 @@ class InvalidPythonTests(TestCase): # See issue #gh-105549 source = "2sin(x)" expected_tokens = [ - TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'), - TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'), - TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'), - TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'), - TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'), - TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'), - TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'), + tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'), + tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'), + tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'), + tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'), + tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') ] - tokens = list(generate_tokens(StringIO(source).readline)) + tokens = list(tokenize.generate_tokens(StringIO(source).readline)) self.assertEqual(tokens, expected_tokens) def test_number_starting_with_zero(self): source = "01234" expected_tokens = [ - TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'), - TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'), - TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'), + tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'), + tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') ] - tokens = list(generate_tokens(StringIO(source).readline)) + tokens = list(tokenize.generate_tokens(StringIO(source).readline)) self.assertEqual(tokens, expected_tokens) class CTokenizeTest(TestCase): @@ -1926,7 +1930,7 @@ class CTokenizeTest(TestCase): f = StringIO(s) with self.subTest(source=s): result = stringify_tokens_from_source( - _generate_tokens_from_c_tokenizer(f.readline), s + tokenize._generate_tokens_from_c_tokenizer(f.readline), s ) self.assertEqual(result, expected.rstrip().splitlines()) @@ -1935,15 +1939,15 @@ class CTokenizeTest(TestCase): yield "1+1".encode(encoding) expected = [ - TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'), - TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'), - TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'), - TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'), - TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') + tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'), + tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'), + tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'), + tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'), + tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='') ] for encoding in ["utf-8", "latin-1", "utf-16"]: with self.subTest(encoding=encoding): - tokens = list(_generate_tokens_from_c_tokenizer( + tokens = list(tokenize._generate_tokens_from_c_tokenizer( readline(encoding).__next__, extra_tokens=True, encoding=encoding, @@ -2796,7 +2800,7 @@ async def f(): def test_invalid_syntax(self): def get_tokens(string): the_string = StringIO(string) - return list(_generate_tokens_from_c_tokenizer(the_string.readline)) + return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline)) for case in [ "(1+2]", @@ -2832,7 +2836,7 @@ async def f(): "]", ]: with self.subTest(case=case): - self.assertRaises(TokenError, get_tokens, case) + self.assertRaises(tokenize.TokenError, get_tokens, case) def test_max_indent(self): MAXINDENT = 100 @@ -2844,14 +2848,14 @@ async def f(): valid = generate_source(MAXINDENT - 1) the_input = StringIO(valid) - tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline)) - self.assertEqual(tokens[-2].type, DEDENT) - self.assertEqual(tokens[-1].type, ENDMARKER) + tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)) + self.assertEqual(tokens[-2].type, tokenize.DEDENT) + self.assertEqual(tokens[-1].type, tokenize.ENDMARKER) compile(valid, "<string>", "exec") invalid = generate_source(MAXINDENT) the_input = StringIO(invalid) - self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline))) + self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))) self.assertRaises( IndentationError, compile, invalid, "<string>", "exec" ) @@ -2860,7 +2864,7 @@ async def f(): def get_tokens(string): the_string = StringIO(string) return [(kind, string) for (kind, string, *_) - in _generate_tokens_from_c_tokenizer(the_string.readline)] + in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)] code = dedent(""" def fib(n): diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 49e8144..c21876f 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -41,7 +41,7 @@ blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding", - "untokenize", "TokenInfo"] + "untokenize", "TokenInfo", "open", "TokenError"] del token class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): @@ -162,8 +162,6 @@ tabsize = 8 class TokenError(Exception): pass -class StopTokenizing(Exception): pass - class Untokenizer: def __init__(self): |