summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2019-03-25 22:01:12 (GMT)
committerGitHub <noreply@github.com>2019-03-25 22:01:12 (GMT)
commit91759d98015e1d6d5e1367cff60592ab548e7806 (patch)
tree903553ec0677b1fc9c3531799ce890fd7a019069
parent027b09c5a13aac9e14a3b43bb385298d549c3833 (diff)
downloadcpython-91759d98015e1d6d5e1367cff60592ab548e7806.zip
cpython-91759d98015e1d6d5e1367cff60592ab548e7806.tar.gz
cpython-91759d98015e1d6d5e1367cff60592ab548e7806.tar.bz2
bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen (GH-12456)
Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
-rw-r--r--[-rwxr-xr-x]Lib/keyword.py129
-rw-r--r--Lib/test/test_keyword.py119
-rw-r--r--Makefile.pre.in11
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst2
-rw-r--r--Parser/pgen/keywordgen.py60
5 files changed, 124 insertions, 197 deletions
diff --git a/Lib/keyword.py b/Lib/keyword.py
index 150c2bc..ddcbb25 100755..100644
--- a/Lib/keyword.py
+++ b/Lib/keyword.py
@@ -1,98 +1,55 @@
-#! /usr/bin/env python3
-
-"""Keywords (from "graminit.c")
+"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
-the python source tree after building the interpreter and run:
+the python source tree and run:
+
+ python3 -m Parser.pgen.keywordgen Grammar/Grammar \
+ Grammar/Tokens \
+ Lib/keyword.py
- ./python Lib/keyword.py
+Alternatively, you can run 'make regen-keyword'.
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
-#--start keywords--
- 'False',
- 'None',
- 'True',
- 'and',
- 'as',
- 'assert',
- 'break',
- 'class',
- 'continue',
- 'def',
- 'del',
- 'elif',
- 'else',
- 'except',
- 'finally',
- 'for',
- 'from',
- 'global',
- 'if',
- 'import',
- 'in',
- 'is',
- 'lambda',
- 'nonlocal',
- 'not',
- 'or',
- 'pass',
- 'raise',
- 'return',
- 'try',
- 'while',
- 'with',
- 'yield',
-#--end keywords--
- ]
-
-kwlist.append('async')
-kwlist.append('await')
-kwlist.sort()
+ 'False',
+ 'None',
+ 'True',
+ 'and',
+ 'as',
+ 'assert',
+ 'async',
+ 'await',
+ 'break',
+ 'class',
+ 'continue',
+ 'def',
+ 'del',
+ 'elif',
+ 'else',
+ 'except',
+ 'finally',
+ 'for',
+ 'from',
+ 'global',
+ 'if',
+ 'import',
+ 'in',
+ 'is',
+ 'lambda',
+ 'nonlocal',
+ 'not',
+ 'or',
+ 'pass',
+ 'raise',
+ 'return',
+ 'try',
+ 'while',
+ 'with',
+ 'yield'
+]
iskeyword = frozenset(kwlist).__contains__
-
-def main():
- import sys, re
-
- args = sys.argv[1:]
- iptfile = args and args[0] or "Python/graminit.c"
- if len(args) > 1: optfile = args[1]
- else: optfile = "Lib/keyword.py"
-
- # load the output skeleton from the target, taking care to preserve its
- # newline convention.
- with open(optfile, newline='') as fp:
- format = fp.readlines()
- nl = format[0][len(format[0].strip()):] if format else '\n'
-
- # scan the source file for keywords
- with open(iptfile) as fp:
- strprog = re.compile('"([^"]+)"')
- lines = []
- for line in fp:
- if '{1, "' in line:
- match = strprog.search(line)
- if match:
- lines.append(" '" + match.group(1) + "'," + nl)
- lines.sort()
-
- # insert the lines of keywords into the skeleton
- try:
- start = format.index("#--start keywords--" + nl) + 1
- end = format.index("#--end keywords--" + nl)
- format[start:end] = lines
- except ValueError:
- sys.stderr.write("target does not contain format markers\n")
- sys.exit(1)
-
- # write the output file
- with open(optfile, 'w', newline='') as fp:
- fp.writelines(format)
-
-if __name__ == "__main__":
- main()
diff --git a/Lib/test/test_keyword.py b/Lib/test/test_keyword.py
index af99f52..3e2a8b3 100644
--- a/Lib/test/test_keyword.py
+++ b/Lib/test/test_keyword.py
@@ -1,20 +1,5 @@
import keyword
import unittest
-from test import support
-import filecmp
-import os
-import sys
-import subprocess
-import shutil
-import textwrap
-
-KEYWORD_FILE = support.findfile('keyword.py')
-GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
- '..', '..', 'Python', 'graminit.c')
-TEST_PY_FILE = 'keyword_test.py'
-GRAMMAR_TEST_FILE = 'graminit_test.c'
-PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
-NONEXISTENT_FILE = 'not_here.txt'
class Test_iskeyword(unittest.TestCase):
@@ -35,103 +20,17 @@ class Test_iskeyword(unittest.TestCase):
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
self.assertFalse(keyword.iskeyword('eggs'))
+ def test_all_keywords_fail_to_be_used_as_names(self):
+ for key in keyword.kwlist:
+ with self.assertRaises(SyntaxError):
+ exec(f"{key} = 42")
-class TestKeywordGeneration(unittest.TestCase):
-
- def _copy_file_without_generated_keywords(self, source_file, dest_file):
- with open(source_file, 'rb') as fp:
- lines = fp.readlines()
- nl = lines[0][len(lines[0].strip()):]
- with open(dest_file, 'wb') as fp:
- fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
- fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])
-
- def _generate_keywords(self, grammar_file, target_keyword_py_file):
- proc = subprocess.Popen([sys.executable,
- KEYWORD_FILE,
- grammar_file,
- target_keyword_py_file], stderr=subprocess.PIPE)
- stderr = proc.communicate()[1]
- return proc.returncode, stderr
-
- @unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
- 'test only works from source build directory')
- def test_real_grammar_and_keyword_file(self):
- self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
- self.addCleanup(support.unlink, TEST_PY_FILE)
- self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
- self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
- TEST_PY_FILE))
- self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
-
- def test_grammar(self):
- self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
- self.addCleanup(support.unlink, TEST_PY_FILE)
- with open(GRAMMAR_TEST_FILE, 'w') as fp:
- # Some of these are probably implementation accidents.
- fp.writelines(textwrap.dedent("""\
- {2, 1},
- {11, "encoding_decl", 0, 2, states_79,
- "\000\000\040\000\000\000\000\000\000\000\000\000"
- "\000\000\000\000\000\000\000\000\000"},
- {1, "jello"},
- {326, 0},
- {1, "turnip"},
- \t{1, "This one is tab indented"
- {278, 0},
- {1, "crazy but legal"
- "also legal" {1, "
- {1, "continue"},
- {1, "lemon"},
- {1, "tomato"},
- {1, "wigii"},
- {1, 'no good'}
- {283, 0},
- {1, "too many spaces"}"""))
- self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
- self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
- expected = [
- " 'This one is tab indented',",
- " 'also legal',",
- " 'continue',",
- " 'crazy but legal',",
- " 'jello',",
- " 'lemon',",
- " 'tomato',",
- " 'turnip',",
- " 'wigii',",
- ]
- with open(TEST_PY_FILE) as fp:
- lines = fp.read().splitlines()
- start = lines.index("#--start keywords--") + 1
- end = lines.index("#--end keywords--")
- actual = lines[start:end]
- self.assertEqual(actual, expected)
-
- def test_empty_grammar_results_in_no_keywords(self):
- self._copy_file_without_generated_keywords(KEYWORD_FILE,
- PY_FILE_WITHOUT_KEYWORDS)
- self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
- shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
- self.addCleanup(support.unlink, TEST_PY_FILE)
- self.assertEqual((0, b''), self._generate_keywords(os.devnull,
- TEST_PY_FILE))
- self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))
-
- def test_keywords_py_without_markers_produces_error(self):
- rc, stderr = self._generate_keywords(os.devnull, os.devnull)
- self.assertNotEqual(rc, 0)
- self.assertRegex(stderr, b'does not contain format markers')
-
- def test_missing_grammar_file_produces_error(self):
- rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
- self.assertNotEqual(rc, 0)
- self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
+ def test_async_and_await_are_keywords(self):
+ self.assertIn("async", keyword.kwlist)
+ self.assertIn("await", keyword.kwlist)
- def test_missing_keywords_py_file_produces_error(self):
- rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
- self.assertNotEqual(rc, 0)
- self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
+ def test_keywords_are_sorted(self):
+ self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)
if __name__ == "__main__":
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 8042e8e..174b12c 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
# Regenerate all generated files
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
- regen-token regen-symbol regen-ast regen-importlib clinic
+ regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic
############################################################################
# Special rules for object files
@@ -843,6 +843,15 @@ regen-token:
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/token.py
+.PHONY: regen-keyword
+regen-keyword:
+ # Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
+ # using Parser/pgen
+ $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
+ $(srcdir)/Grammar/Tokens \
+ $(srcdir)/Lib/keyword.py.new
+ $(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
+
.PHONY: regen-symbol
regen-symbol: $(srcdir)/Include/graminit.h
# Regenerate Lib/symbol.py from Include/graminit.h
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst
new file mode 100644
index 0000000..10d6c49
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-20-00-37-24.bpo-12456.fnKoKo.rst
@@ -0,0 +1,2 @@
+Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
+by Pablo Galindo.
diff --git a/Parser/pgen/keywordgen.py b/Parser/pgen/keywordgen.py
new file mode 100644
index 0000000..eeb3ef7
--- /dev/null
+++ b/Parser/pgen/keywordgen.py
@@ -0,0 +1,60 @@
+"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
+
+import argparse
+
+from .pgen import ParserGenerator
+
+TEMPLATE = r'''
+"""Keywords (from "Grammar/Grammar")
+
+This file is automatically generated; please don't muck it up!
+
+To update the symbols in this file, 'cd' to the top directory of
+the python source tree and run:
+
+ python3 -m Parser.pgen.keywordgen Grammar/Grammar \
+ Grammar/Tokens \
+ Lib/keyword.py
+
+Alternatively, you can run 'make regen-keyword'.
+"""
+
+__all__ = ["iskeyword", "kwlist"]
+
+kwlist = [
+ {keywords}
+]
+
+iskeyword = frozenset(kwlist).__contains__
+'''.lstrip()
+
+EXTRA_KEYWORDS = ["async", "await"]
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
+ "file from the grammar.")
+ parser.add_argument(
+ "grammar", type=str, help="The file with the grammar definition in EBNF format"
+ )
+ parser.add_argument(
+ "tokens", type=str, help="The file with the token definitions"
+ )
+ parser.add_argument(
+ "keyword_file",
+ type=argparse.FileType('w'),
+ help="The path to write the keyword definitions",
+ )
+ args = parser.parse_args()
+ p = ParserGenerator(args.grammar, args.tokens)
+ grammar = p.make_grammar()
+
+ with args.keyword_file as thefile:
+ all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
+
+ keywords = ",\n ".join(map(repr, all_keywords))
+ thefile.write(TEMPLATE.format(keywords=keywords))
+
+
+if __name__ == "__main__":
+ main()