diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2019-03-25 22:01:12 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-25 22:01:12 (GMT) |
commit | 91759d98015e1d6d5e1367cff60592ab548e7806 (patch) | |
tree | 903553ec0677b1fc9c3531799ce890fd7a019069 | |
parent | 027b09c5a13aac9e14a3b43bb385298d549c3833 (diff) | |
download | cpython-91759d98015e1d6d5e1367cff60592ab548e7806.zip cpython-91759d98015e1d6d5e1367cff60592ab548e7806.tar.gz cpython-91759d98015e1d6d5e1367cff60592ab548e7806.tar.bz2 |
bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen (GH-12456)
Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
-rw-r--r--[-rwxr-xr-x] | Lib/keyword.py | 129 | ||||
-rw-r--r-- | Lib/test/test_keyword.py | 119 | ||||
-rw-r--r-- | Makefile.pre.in | 11 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst | 2 | ||||
-rw-r--r-- | Parser/pgen/keywordgen.py | 60 |
5 files changed, 124 insertions, 197 deletions
diff --git a/Lib/keyword.py b/Lib/keyword.py index 150c2bc..ddcbb25 100755..100644 --- a/Lib/keyword.py +++ b/Lib/keyword.py @@ -1,98 +1,55 @@ -#! /usr/bin/env python3 - -"""Keywords (from "graminit.c") +"""Keywords (from "Grammar/Grammar") This file is automatically generated; please don't muck it up! To update the symbols in this file, 'cd' to the top directory of -the python source tree after building the interpreter and run: +the python source tree and run: + + python3 -m Parser.pgen.keywordgen Grammar/Grammar \ + Grammar/Tokens \ + Lib/keyword.py - ./python Lib/keyword.py +Alternatively, you can run 'make regen-keyword'. """ __all__ = ["iskeyword", "kwlist"] kwlist = [ -#--start keywords-- - 'False', - 'None', - 'True', - 'and', - 'as', - 'assert', - 'break', - 'class', - 'continue', - 'def', - 'del', - 'elif', - 'else', - 'except', - 'finally', - 'for', - 'from', - 'global', - 'if', - 'import', - 'in', - 'is', - 'lambda', - 'nonlocal', - 'not', - 'or', - 'pass', - 'raise', - 'return', - 'try', - 'while', - 'with', - 'yield', -#--end keywords-- - ] - -kwlist.append('async') -kwlist.append('await') -kwlist.sort() + 'False', + 'None', + 'True', + 'and', + 'as', + 'assert', + 'async', + 'await', + 'break', + 'class', + 'continue', + 'def', + 'del', + 'elif', + 'else', + 'except', + 'finally', + 'for', + 'from', + 'global', + 'if', + 'import', + 'in', + 'is', + 'lambda', + 'nonlocal', + 'not', + 'or', + 'pass', + 'raise', + 'return', + 'try', + 'while', + 'with', + 'yield' +] iskeyword = frozenset(kwlist).__contains__ - -def main(): - import sys, re - - args = sys.argv[1:] - iptfile = args and args[0] or "Python/graminit.c" - if len(args) > 1: optfile = args[1] - else: optfile = "Lib/keyword.py" - - # load the output skeleton from the target, taking care to preserve its - # newline convention. - with open(optfile, newline='') as fp: - format = fp.readlines() - nl = format[0][len(format[0].strip()):] if format else '\n' - - # scan the source file for keywords - with open(iptfile) as fp: - strprog = re.compile('"([^"]+)"') - lines = [] - for line in fp: - if '{1, "' in line: - match = strprog.search(line) - if match: - lines.append(" '" + match.group(1) + "'," + nl) - lines.sort() - - # insert the lines of keywords into the skeleton - try: - start = format.index("#--start keywords--" + nl) + 1 - end = format.index("#--end keywords--" + nl) - format[start:end] = lines - except ValueError: - sys.stderr.write("target does not contain format markers\n") - sys.exit(1) - - # write the output file - with open(optfile, 'w', newline='') as fp: - fp.writelines(format) - -if __name__ == "__main__": - main() diff --git a/Lib/test/test_keyword.py b/Lib/test/test_keyword.py index af99f52..3e2a8b3 100644 --- a/Lib/test/test_keyword.py +++ b/Lib/test/test_keyword.py @@ -1,20 +1,5 @@ import keyword import unittest -from test import support -import filecmp -import os -import sys -import subprocess -import shutil -import textwrap - -KEYWORD_FILE = support.findfile('keyword.py') -GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0], - '..', '..', 'Python', 'graminit.c') -TEST_PY_FILE = 'keyword_test.py' -GRAMMAR_TEST_FILE = 'graminit_test.c' -PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py' -NONEXISTENT_FILE = 'not_here.txt' class Test_iskeyword(unittest.TestCase): @@ -35,103 +20,17 @@ class Test_iskeyword(unittest.TestCase): keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice'] self.assertFalse(keyword.iskeyword('eggs')) + def test_all_keywords_fail_to_be_used_as_names(self): + for key in keyword.kwlist: + with self.assertRaises(SyntaxError): + exec(f"{key} = 42") -class TestKeywordGeneration(unittest.TestCase): - - def _copy_file_without_generated_keywords(self, source_file, dest_file): - with open(source_file, 'rb') as fp: - lines = fp.readlines() - nl = lines[0][len(lines[0].strip()):] - with open(dest_file, 'wb') as fp: - fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1]) - fp.writelines(lines[lines.index(b"#--end keywords--" + nl):]) - - def _generate_keywords(self, grammar_file, target_keyword_py_file): - proc = subprocess.Popen([sys.executable, - KEYWORD_FILE, - grammar_file, - target_keyword_py_file], stderr=subprocess.PIPE) - stderr = proc.communicate()[1] - return proc.returncode, stderr - - @unittest.skipIf(not os.path.exists(GRAMMAR_FILE), - 'test only works from source build directory') - def test_real_grammar_and_keyword_file(self): - self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE) - self.addCleanup(support.unlink, TEST_PY_FILE) - self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE)) - self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE, - TEST_PY_FILE)) - self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE)) - - def test_grammar(self): - self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE) - self.addCleanup(support.unlink, TEST_PY_FILE) - with open(GRAMMAR_TEST_FILE, 'w') as fp: - # Some of these are probably implementation accidents. - fp.writelines(textwrap.dedent("""\ - {2, 1}, - {11, "encoding_decl", 0, 2, states_79, - "\000\000\040\000\000\000\000\000\000\000\000\000" - "\000\000\000\000\000\000\000\000\000"}, - {1, "jello"}, - {326, 0}, - {1, "turnip"}, - \t{1, "This one is tab indented" - {278, 0}, - {1, "crazy but legal" - "also legal" {1, " - {1, "continue"}, - {1, "lemon"}, - {1, "tomato"}, - {1, "wigii"}, - {1, 'no good'} - {283, 0}, - {1, "too many spaces"}""")) - self.addCleanup(support.unlink, GRAMMAR_TEST_FILE) - self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE) - expected = [ - " 'This one is tab indented',", - " 'also legal',", - " 'continue',", - " 'crazy but legal',", - " 'jello',", - " 'lemon',", - " 'tomato',", - " 'turnip',", - " 'wigii',", - ] - with open(TEST_PY_FILE) as fp: - lines = fp.read().splitlines() - start = lines.index("#--start keywords--") + 1 - end = lines.index("#--end keywords--") - actual = lines[start:end] - self.assertEqual(actual, expected) - - def test_empty_grammar_results_in_no_keywords(self): - self._copy_file_without_generated_keywords(KEYWORD_FILE, - PY_FILE_WITHOUT_KEYWORDS) - self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS) - shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE) - self.addCleanup(support.unlink, TEST_PY_FILE) - self.assertEqual((0, b''), self._generate_keywords(os.devnull, - TEST_PY_FILE)) - self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS)) - - def test_keywords_py_without_markers_produces_error(self): - rc, stderr = self._generate_keywords(os.devnull, os.devnull) - self.assertNotEqual(rc, 0) - self.assertRegex(stderr, b'does not contain format markers') - - def test_missing_grammar_file_produces_error(self): - rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE) - self.assertNotEqual(rc, 0) - self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode()) + def test_async_and_await_are_keywords(self): + self.assertIn("async", keyword.kwlist) + self.assertIn("await", keyword.kwlist) - def test_missing_keywords_py_file_produces_error(self): - rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE) - self.assertNotEqual(rc, 0) - self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode()) + def test_keywords_are_sorted(self): + self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist) if __name__ == "__main__": diff --git a/Makefile.pre.in b/Makefile.pre.in index 8042e8e..174b12c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib # Regenerate all generated files regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \ - regen-token regen-symbol regen-ast regen-importlib clinic + regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic ############################################################################ # Special rules for object files @@ -843,6 +843,15 @@ regen-token: $(srcdir)/Grammar/Tokens \ $(srcdir)/Lib/token.py +.PHONY: regen-keyword +regen-keyword: + # Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens + # using Parser/pgen + $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \ + $(srcdir)/Grammar/Tokens \ + $(srcdir)/Lib/keyword.py.new + $(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new + .PHONY: regen-symbol regen-symbol: $(srcdir)/Include/graminit.h # Regenerate Lib/symbol.py from Include/graminit.h diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst new file mode 100644 index 0000000..10d6c49 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst @@ -0,0 +1,2 @@ +Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch +by Pablo Galindo. diff --git a/Parser/pgen/keywordgen.py b/Parser/pgen/keywordgen.py new file mode 100644 index 0000000..eeb3ef7 --- /dev/null +++ b/Parser/pgen/keywordgen.py @@ -0,0 +1,60 @@ +"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen""" + +import argparse + +from .pgen import ParserGenerator + +TEMPLATE = r''' +"""Keywords (from "Grammar/Grammar") + +This file is automatically generated; please don't muck it up! + +To update the symbols in this file, 'cd' to the top directory of +the python source tree and run: + + python3 -m Parser.pgen.keywordgen Grammar/Grammar \ + Grammar/Tokens \ + Lib/keyword.py + +Alternatively, you can run 'make regen-keyword'. +""" + +__all__ = ["iskeyword", "kwlist"] + +kwlist = [ + {keywords} +] + +iskeyword = frozenset(kwlist).__contains__ +'''.lstrip() + +EXTRA_KEYWORDS = ["async", "await"] + + +def main(): + parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py " + "file from the grammar.") + parser.add_argument( + "grammar", type=str, help="The file with the grammar definition in EBNF format" + ) + parser.add_argument( + "tokens", type=str, help="The file with the token definitions" + ) + parser.add_argument( + "keyword_file", + type=argparse.FileType('w'), + help="The path to write the keyword definitions", + ) + args = parser.parse_args() + p = ParserGenerator(args.grammar, args.tokens) + grammar = p.make_grammar() + + with args.keyword_file as thefile: + all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS) + + keywords = ",\n ".join(map(repr, all_keywords)) + thefile.write(TEMPLATE.format(keywords=keywords)) + + +if __name__ == "__main__": + main() |