From ed4ffd74042f5ac34a92514fdef8b61669e309ea Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 2 Oct 2023 17:11:24 +0200 Subject: [3.12] gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265) (#109677) * gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265) (cherry picked from commit 1110c5bc828218086f6397ec05a9312fb73ea30a) * gh-108303: Add `Lib/test/tokenizedata` to `TESTSUBDIRS` (#109314) (cherry picked from commit 42ab2cbd7b5e76e919b70883ae683e789dbd913d) --------- Co-authored-by: Nikita Sobolev --- .gitattributes | 2 +- .pre-commit-config.yaml | 2 +- Lib/test/bad_coding.py | 1 - Lib/test/bad_coding2.py | 2 - Lib/test/badsyntax_3131.py | 2 - Lib/test/coding20731.py | 4 - Lib/test/test_py_compile.py | 16 +- Lib/test/test_source_encoding.py | 5 +- Lib/test/test_tarfile.py | 29 ++-- Lib/test/test_tokenize.py | 7 +- Lib/test/test_tools/test_reindent.py | 2 +- Lib/test/test_unicode_identifiers.py | 2 +- ...tests-latin1-coding-cookie-and-utf8-bom-sig.txt | 13 -- ...ests-no-coding-cookie-and-utf8-bom-sig-only.txt | 11 -- ...ests-utf8-coding-cookie-and-no-utf8-bom-sig.txt | 13 -- ...e_tests-utf8-coding-cookie-and-utf8-bom-sig.txt | 12 -- Lib/test/tokenize_tests.txt | 189 --------------------- Lib/test/tokenizedata/__init__.py | 0 Lib/test/tokenizedata/bad_coding.py | 1 + Lib/test/tokenizedata/bad_coding2.py | 2 + Lib/test/tokenizedata/badsyntax_3131.py | 2 + Lib/test/tokenizedata/coding20731.py | 4 + ...tests-latin1-coding-cookie-and-utf8-bom-sig.txt | 13 ++ ...ests-no-coding-cookie-and-utf8-bom-sig-only.txt | 11 ++ ...ests-utf8-coding-cookie-and-no-utf8-bom-sig.txt | 13 ++ ...e_tests-utf8-coding-cookie-and-utf8-bom-sig.txt | 12 ++ Lib/test/tokenizedata/tokenize_tests.txt | 189 +++++++++++++++++++++ Makefile.pre.in | 1 + 28 files changed, 288 insertions(+), 272 deletions(-) delete mode 100644 Lib/test/bad_coding.py delete mode 100644 Lib/test/bad_coding2.py delete mode 100644 Lib/test/badsyntax_3131.py delete mode 100644 Lib/test/coding20731.py delete mode 100644 Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt delete mode 100644 Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt delete mode 100644 Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt delete mode 100644 Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt delete mode 100644 Lib/test/tokenize_tests.txt create mode 100644 Lib/test/tokenizedata/__init__.py create mode 100644 Lib/test/tokenizedata/bad_coding.py create mode 100644 Lib/test/tokenizedata/bad_coding2.py create mode 100644 Lib/test/tokenizedata/badsyntax_3131.py create mode 100644 Lib/test/tokenizedata/coding20731.py create mode 100644 Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt create mode 100644 Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt create mode 100644 Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt create mode 100644 Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt create mode 100644 Lib/test/tokenizedata/tokenize_tests.txt diff --git a/.gitattributes b/.gitattributes index 4ed9506..2bfd4bf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -25,7 +25,7 @@ PC/classicAppCompat.* binary [attr]noeol -text Lib/test/cjkencodings/* noeol -Lib/test/coding20731.py noeol +Lib/test/tokenizedata/coding20731.py noeol Lib/test/decimaltestdata/*.decTest noeol Lib/test/test_email/data/*.txt noeol Lib/test/test_importlib/resources/data01/* noeol diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19f6a03..4c1fd20 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: check-yaml - id: end-of-file-fixer types: [python] - exclude: Lib/test/coding20731.py + exclude: Lib/test/tokenizedata/coding20731.py - id: trailing-whitespace types_or: [c, python, rst] diff --git a/Lib/test/bad_coding.py b/Lib/test/bad_coding.py deleted file mode 100644 index 971b0a8..0000000 --- a/Lib/test/bad_coding.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: uft-8 -*- diff --git a/Lib/test/bad_coding2.py b/Lib/test/bad_coding2.py deleted file mode 100644 index bb2bb7e..0000000 --- a/Lib/test/bad_coding2.py +++ /dev/null @@ -1,2 +0,0 @@ -#coding: utf8 -print('我') diff --git a/Lib/test/badsyntax_3131.py b/Lib/test/badsyntax_3131.py deleted file mode 100644 index 901d374..0000000 --- a/Lib/test/badsyntax_3131.py +++ /dev/null @@ -1,2 +0,0 @@ -# -*- coding: utf-8 -*- -€ = 2 diff --git a/Lib/test/coding20731.py b/Lib/test/coding20731.py deleted file mode 100644 index b0e227a..0000000 --- a/Lib/test/coding20731.py +++ /dev/null @@ -1,4 +0,0 @@ -#coding:latin1 - - - diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py index 5e0a44a..c4e6551 100644 --- a/Lib/test/test_py_compile.py +++ b/Lib/test/test_py_compile.py @@ -132,7 +132,9 @@ class PyCompileTestsBase: os.chmod(self.directory, mode.st_mode) def test_bad_coding(self): - bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') with support.captured_stderr(): self.assertIsNone(py_compile.compile(bad_coding, doraise=False)) self.assertFalse(os.path.exists( @@ -195,7 +197,9 @@ class PyCompileTestsBase: self.assertEqual(flags, 0b1) def test_quiet(self): - bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') with support.captured_stderr() as stderr: self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2)) self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2)) @@ -260,14 +264,18 @@ class PyCompileCLITestCase(unittest.TestCase): self.assertTrue(os.path.exists(self.cache_path)) def test_bad_syntax(self): - bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax) self.assertEqual(rc, 1) self.assertEqual(stdout, b'') self.assertIn(b'SyntaxError', stderr) def test_bad_syntax_with_quiet(self): - bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax) self.assertEqual(rc, 1) self.assertEqual(stdout, b'') diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index 72c2b47..2787137 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -68,6 +68,7 @@ class MiscSourceEncodingTest(unittest.TestCase): def test_20731(self): sub = subprocess.Popen([sys.executable, os.path.join(os.path.dirname(__file__), + 'tokenizedata', 'coding20731.py')], stderr=subprocess.PIPE) err = sub.communicate()[1] @@ -100,10 +101,10 @@ class MiscSourceEncodingTest(unittest.TestCase): self.verify_bad_module(module_name) def verify_bad_module(self, module_name): - self.assertRaises(SyntaxError, __import__, 'test.' + module_name) + self.assertRaises(SyntaxError, __import__, 'test.tokenizedata.' + module_name) path = os.path.dirname(__file__) - filename = os.path.join(path, module_name + '.py') + filename = os.path.join(path, 'tokenizedata', module_name + '.py') with open(filename, "rb") as fp: bytes = fp.read() self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 013c626..5d9714e 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2564,16 +2564,17 @@ class CommandLineTest(unittest.TestCase): return script_helper.assert_python_failure('-m', 'tarfile', *args) def make_simple_tarfile(self, tar_name): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] self.addCleanup(os_helper.unlink, tar_name) with tarfile.open(tar_name, 'w') as tf: for tardata in files: tf.add(tardata, arcname=os.path.basename(tardata)) def make_evil_tarfile(self, tar_name): - files = [support.findfile('tokenize_tests.txt')] self.addCleanup(os_helper.unlink, tar_name) with tarfile.open(tar_name, 'w') as tf: benign = tarfile.TarInfo('benign') @@ -2654,9 +2655,11 @@ class CommandLineTest(unittest.TestCase): self.assertEqual(rc, 1) def test_create_command(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for opt in '-c', '--create': try: out = self.tarfilecmd(opt, tmpname, *files) @@ -2667,9 +2670,11 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tmpname) def test_create_command_verbose(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for opt in '-v', '--verbose': try: out = self.tarfilecmd(opt, '-c', tmpname, *files, @@ -2681,7 +2686,7 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tmpname) def test_create_command_dotless_filename(self): - files = [support.findfile('tokenize_tests.txt')] + files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')] try: out = self.tarfilecmd('-c', dotlessname, *files) self.assertEqual(out, b'') @@ -2692,7 +2697,7 @@ class CommandLineTest(unittest.TestCase): def test_create_command_dot_started_filename(self): tar_name = os.path.join(TEMPDIR, ".testtar") - files = [support.findfile('tokenize_tests.txt')] + files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')] try: out = self.tarfilecmd('-c', tar_name, *files) self.assertEqual(out, b'') @@ -2702,9 +2707,11 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tar_name) def test_create_command_compressed(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for filetype in (GzipTest, Bz2Test, LzmaTest): if not filetype.open: continue diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index c320478..40680f0 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1198,7 +1198,7 @@ class TestTokenizerAdheresToPep0263(TestCase): """ def _testFile(self, filename): - path = os.path.join(os.path.dirname(__file__), filename) + path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename) with open(path, 'rb') as f: TestRoundtrip.check_roundtrip(self, f) @@ -1791,7 +1791,7 @@ class TestRoundtrip(TestCase): self.check_roundtrip("if x == 1 : \n" " print(x)\n") - fn = support.findfile("tokenize_tests.txt") + fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata") with open(fn, 'rb') as f: self.check_roundtrip(f) self.check_roundtrip("if x == 1:\n" @@ -1846,8 +1846,7 @@ class TestRoundtrip(TestCase): # pass the '-ucpu' option to process the full directory. import glob, random - fn = support.findfile("tokenize_tests.txt") - tempdir = os.path.dirname(fn) or os.curdir + tempdir = os.path.dirname(__file__) or os.curdir testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) # Tokenize is broken on test_pep3131.py because regular expressions are diff --git a/Lib/test/test_tools/test_reindent.py b/Lib/test/test_tools/test_reindent.py index 3b0c793..64e31c2 100644 --- a/Lib/test/test_tools/test_reindent.py +++ b/Lib/test/test_tools/test_reindent.py @@ -25,7 +25,7 @@ class ReindentTests(unittest.TestCase): self.assertGreater(err, b'') def test_reindent_file_with_bad_encoding(self): - bad_coding_path = findfile('bad_coding.py') + bad_coding_path = findfile('bad_coding.py', subdir='tokenizedata') rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path) self.assertEqual(out, b'') self.assertNotEqual(err, b'') diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index 5b9ced5..63c6c05 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -19,7 +19,7 @@ class PEP3131Test(unittest.TestCase): def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt deleted file mode 100644 index 1b5335b..0000000 --- a/Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: latin1 -*- -# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' -# at the start of it. Make sure this is preserved if any changes -# are made! Also note that the coding cookie above conflicts with -# the presence of a utf-8 BOM signature -- this is intended. - -# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). -x = 'ЉЊЈЁЂ' -def y(): - """ - And again in a comment. ЉЊЈЁЂ - """ - pass diff --git a/Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt b/Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt deleted file mode 100644 index 23fd216..0000000 --- a/Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt +++ /dev/null @@ -1,11 +0,0 @@ -# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' -# at the start of it. Make sure this is preserved if any changes -# are made! - -# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). -x = 'ЉЊЈЁЂ' -def y(): - """ - And again in a comment. ЉЊЈЁЂ - """ - pass diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt b/Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt deleted file mode 100644 index 04561e4..0000000 --- a/Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- -# IMPORTANT: unlike the other test_tokenize-*.txt files, this file -# does NOT have the utf-8 BOM signature '\xef\xbb\xbf' at the start -# of it. Make sure this is not added inadvertently by your editor -# if any changes are made to this file! - -# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). -x = 'ЉЊЈЁЂ' -def y(): - """ - And again in a comment. ЉЊЈЁЂ - """ - pass diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt deleted file mode 100644 index 4b20ff6..0000000 --- a/Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' -# at the start of it. Make sure this is preserved if any changes -# are made! - -# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). -x = 'ЉЊЈЁЂ' -def y(): - """ - And again in a comment. ЉЊЈЁЂ - """ - pass diff --git a/Lib/test/tokenize_tests.txt b/Lib/test/tokenize_tests.txt deleted file mode 100644 index c4f5a58..0000000 --- a/Lib/test/tokenize_tests.txt +++ /dev/null @@ -1,189 +0,0 @@ -# Tests for the 'tokenize' module. -# Large bits stolen from test_grammar.py. - -# Comments -"#" -#' -#" -#\ - # - # abc -'''# -#''' - -x = 1 # - -# Balancing continuation - -a = (3, 4, - 5, 6) -y = [3, 4, - 5] -z = {'a':5, - 'b':6} -x = (len(repr(y)) + 5*x - a[ - 3 ] - - x + len({ - } - ) - ) - -# Backslash means line continuation: -x = 1 \ -+ 1 - -# Backslash does not means continuation in comments :\ -x = 0 - -# Ordinary integers -0xff != 255 -0o377 != 255 -2147483647 != 0o17777777777 --2147483647-1 != 0o20000000000 -0o37777777777 != -1 -0xffffffff != -1; 0o37777777777 != -1; -0o1234567 == 0O001234567; 0b10101 == 0B00010101 - -# Long integers -x = 0 -x = 0 -x = 0xffffffffffffffff -x = 0xffffffffffffffff -x = 0o77777777777777777 -x = 0B11101010111111111 -x = 123456789012345678901234567890 -x = 123456789012345678901234567890 - -# Floating-point numbers -x = 3.14 -x = 314. -x = 0.314 -# XXX x = 000.314 -x = .314 -x = 3e14 -x = 3E14 -x = 3e-14 -x = 3e+14 -x = 3.e14 -x = .3e14 -x = 3.1e4 - -# String literals -x = ''; y = ""; -x = '\''; y = "'"; -x = '"'; y = "\""; -x = "doesn't \"shrink\" does it" -y = 'doesn\'t "shrink" does it' -x = "does \"shrink\" doesn't it" -y = 'does "shrink" doesn\'t it' -x = """ -The "quick" -brown fox -jumps over -the 'lazy' dog. -""" -y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' -y = ''' -The "quick" -brown fox -jumps over -the 'lazy' dog. -'''; -y = "\n\ -The \"quick\"\n\ -brown fox\n\ -jumps over\n\ -the 'lazy' dog.\n\ -"; -y = '\n\ -The \"quick\"\n\ -brown fox\n\ -jumps over\n\ -the \'lazy\' dog.\n\ -'; -x = r'\\' + R'\\' -x = r'\'' + '' -y = r''' -foo bar \\ -baz''' + R''' -foo''' -y = r"""foo -bar \\ baz -""" + R'''spam -''' -x = b'abc' + B'ABC' -y = b"abc" + B"ABC" -x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC' -y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC" -x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC' -y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC" -x = br'\\' + BR'\\' -x = rb'\\' + RB'\\' -x = br'\'' + '' -x = rb'\'' + '' -y = br''' -foo bar \\ -baz''' + BR''' -foo''' -y = Br"""foo -bar \\ baz -""" + bR'''spam -''' -y = rB"""foo -bar \\ baz -""" + Rb'''spam -''' - -# Indentation -if 1: - x = 2 -if 1: - x = 2 -if 1: - while 0: - if 0: - x = 2 - x = 2 -if 0: - if 2: - while 0: - if 1: - x = 2 - -# Operators - -def d22(a, b, c=1, d=2): pass -def d01v(a=1, *restt, **restd): pass - -(x, y) != ({'a':1}, {'b':2}) - -# comparison -if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 != 1 in 1 not in 1 is 1 is not 1: pass - -# binary -x = 1 & 1 -x = 1 ^ 1 -x = 1 | 1 - -# shift -x = 1 << 1 >> 1 - -# additive -x = 1 - 1 + 1 - 1 + 1 - -# multiplicative -x = 1 / 1 * 1 % 1 - -# unary -x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 -x = -1*1/1 + 1*1 - ---1*1 - -# selector -import sys, time -x = sys.modules['time'].time() - -@staticmethod -def foo(): pass - -@staticmethod -def foo(x:1)->1: pass - diff --git a/Lib/test/tokenizedata/__init__.py b/Lib/test/tokenizedata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Lib/test/tokenizedata/bad_coding.py b/Lib/test/tokenizedata/bad_coding.py new file mode 100644 index 0000000..971b0a8 --- /dev/null +++ b/Lib/test/tokenizedata/bad_coding.py @@ -0,0 +1 @@ +# -*- coding: uft-8 -*- diff --git a/Lib/test/tokenizedata/bad_coding2.py b/Lib/test/tokenizedata/bad_coding2.py new file mode 100644 index 0000000..bb2bb7e --- /dev/null +++ b/Lib/test/tokenizedata/bad_coding2.py @@ -0,0 +1,2 @@ +#coding: utf8 +print('我') diff --git a/Lib/test/tokenizedata/badsyntax_3131.py b/Lib/test/tokenizedata/badsyntax_3131.py new file mode 100644 index 0000000..901d374 --- /dev/null +++ b/Lib/test/tokenizedata/badsyntax_3131.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +€ = 2 diff --git a/Lib/test/tokenizedata/coding20731.py b/Lib/test/tokenizedata/coding20731.py new file mode 100644 index 0000000..b0e227a --- /dev/null +++ b/Lib/test/tokenizedata/coding20731.py @@ -0,0 +1,4 @@ +#coding:latin1 + + + diff --git a/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt new file mode 100644 index 0000000..1b5335b --- /dev/null +++ b/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt @@ -0,0 +1,13 @@ +# -*- coding: latin1 -*- +# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' +# at the start of it. Make sure this is preserved if any changes +# are made! Also note that the coding cookie above conflicts with +# the presence of a utf-8 BOM signature -- this is intended. + +# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). +x = 'ЉЊЈЁЂ' +def y(): + """ + And again in a comment. ЉЊЈЁЂ + """ + pass diff --git a/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt b/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt new file mode 100644 index 0000000..23fd216 --- /dev/null +++ b/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt @@ -0,0 +1,11 @@ +# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' +# at the start of it. Make sure this is preserved if any changes +# are made! + +# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). +x = 'ЉЊЈЁЂ' +def y(): + """ + And again in a comment. ЉЊЈЁЂ + """ + pass diff --git a/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt new file mode 100644 index 0000000..04561e4 --- /dev/null +++ b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# IMPORTANT: unlike the other test_tokenize-*.txt files, this file +# does NOT have the utf-8 BOM signature '\xef\xbb\xbf' at the start +# of it. Make sure this is not added inadvertently by your editor +# if any changes are made to this file! + +# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). +x = 'ЉЊЈЁЂ' +def y(): + """ + And again in a comment. ЉЊЈЁЂ + """ + pass diff --git a/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt new file mode 100644 index 0000000..4b20ff6 --- /dev/null +++ b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# IMPORTANT: this file has the utf-8 BOM signature '\xef\xbb\xbf' +# at the start of it. Make sure this is preserved if any changes +# are made! + +# Arbitrary encoded utf-8 text (stolen from test_doctest2.py). +x = 'ЉЊЈЁЂ' +def y(): + """ + And again in a comment. ЉЊЈЁЂ + """ + pass diff --git a/Lib/test/tokenizedata/tokenize_tests.txt b/Lib/test/tokenizedata/tokenize_tests.txt new file mode 100644 index 0000000..c4f5a58 --- /dev/null +++ b/Lib/test/tokenizedata/tokenize_tests.txt @@ -0,0 +1,189 @@ +# Tests for the 'tokenize' module. +# Large bits stolen from test_grammar.py. + +# Comments +"#" +#' +#" +#\ + # + # abc +'''# +#''' + +x = 1 # + +# Balancing continuation + +a = (3, 4, + 5, 6) +y = [3, 4, + 5] +z = {'a':5, + 'b':6} +x = (len(repr(y)) + 5*x - a[ + 3 ] + - x + len({ + } + ) + ) + +# Backslash means line continuation: +x = 1 \ ++ 1 + +# Backslash does not means continuation in comments :\ +x = 0 + +# Ordinary integers +0xff != 255 +0o377 != 255 +2147483647 != 0o17777777777 +-2147483647-1 != 0o20000000000 +0o37777777777 != -1 +0xffffffff != -1; 0o37777777777 != -1; -0o1234567 == 0O001234567; 0b10101 == 0B00010101 + +# Long integers +x = 0 +x = 0 +x = 0xffffffffffffffff +x = 0xffffffffffffffff +x = 0o77777777777777777 +x = 0B11101010111111111 +x = 123456789012345678901234567890 +x = 123456789012345678901234567890 + +# Floating-point numbers +x = 3.14 +x = 314. +x = 0.314 +# XXX x = 000.314 +x = .314 +x = 3e14 +x = 3E14 +x = 3e-14 +x = 3e+14 +x = 3.e14 +x = .3e14 +x = 3.1e4 + +# String literals +x = ''; y = ""; +x = '\''; y = "'"; +x = '"'; y = "\""; +x = "doesn't \"shrink\" does it" +y = 'doesn\'t "shrink" does it' +x = "does \"shrink\" doesn't it" +y = 'does "shrink" doesn\'t it' +x = """ +The "quick" +brown fox +jumps over +the 'lazy' dog. +""" +y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' +y = ''' +The "quick" +brown fox +jumps over +the 'lazy' dog. +'''; +y = "\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the 'lazy' dog.\n\ +"; +y = '\n\ +The \"quick\"\n\ +brown fox\n\ +jumps over\n\ +the \'lazy\' dog.\n\ +'; +x = r'\\' + R'\\' +x = r'\'' + '' +y = r''' +foo bar \\ +baz''' + R''' +foo''' +y = r"""foo +bar \\ baz +""" + R'''spam +''' +x = b'abc' + B'ABC' +y = b"abc" + B"ABC" +x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC' +y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC" +x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC' +y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC" +x = br'\\' + BR'\\' +x = rb'\\' + RB'\\' +x = br'\'' + '' +x = rb'\'' + '' +y = br''' +foo bar \\ +baz''' + BR''' +foo''' +y = Br"""foo +bar \\ baz +""" + bR'''spam +''' +y = rB"""foo +bar \\ baz +""" + Rb'''spam +''' + +# Indentation +if 1: + x = 2 +if 1: + x = 2 +if 1: + while 0: + if 0: + x = 2 + x = 2 +if 0: + if 2: + while 0: + if 1: + x = 2 + +# Operators + +def d22(a, b, c=1, d=2): pass +def d01v(a=1, *restt, **restd): pass + +(x, y) != ({'a':1}, {'b':2}) + +# comparison +if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 != 1 in 1 not in 1 is 1 is not 1: pass + +# binary +x = 1 & 1 +x = 1 ^ 1 +x = 1 | 1 + +# shift +x = 1 << 1 >> 1 + +# additive +x = 1 - 1 + 1 - 1 + 1 + +# multiplicative +x = 1 / 1 * 1 % 1 + +# unary +x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 +x = -1*1/1 + 1*1 - ---1*1 + +# selector +import sys, time +x = sys.modules['time'].time() + +@staticmethod +def foo(): pass + +@staticmethod +def foo(x:1)->1: pass + diff --git a/Makefile.pre.in b/Makefile.pre.in index cf054c1..7418ddf 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2226,6 +2226,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_zipfile/_path \ test/test_zoneinfo \ test/test_zoneinfo/data \ + test/tokenizedata \ test/tracedmodules \ test/typinganndata \ test/xmltestdata \ -- cgit v0.12