diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2020-07-27 18:20:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-27 18:20:36 (GMT) |
commit | 72cabb2aa636272e608285f5a6ba83b62be9be4e (patch) | |
tree | c43c2815b9aa9389413934136c7be9ac514c6081 /Doc | |
parent | 67987acd5dc9776f55f4e139e2b3d9e7a6434d9f (diff) | |
download | cpython-72cabb2aa636272e608285f5a6ba83b62be9be4e.zip cpython-72cabb2aa636272e608285f5a6ba83b62be9be4e.tar.gz cpython-72cabb2aa636272e608285f5a6ba83b62be9be4e.tar.bz2 |
bpo-40939: Use the new grammar for the grammar specification documentation (GH-19969)
(We censor the heck out of actions and some other stuff using a custom "highlighter".)
Co-authored-by: Guido van Rossum <guido@python.org>
Diffstat (limited to 'Doc')
-rw-r--r-- | Doc/conf.py | 2 | ||||
-rw-r--r-- | Doc/reference/grammar.rst | 18 | ||||
-rw-r--r-- | Doc/tools/extensions/peg_highlight.py | 75 |
3 files changed, 91 insertions, 4 deletions
diff --git a/Doc/conf.py b/Doc/conf.py index 12d74ea..bfb2a98 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -15,7 +15,7 @@ sys.path.append(os.path.abspath('includes')) extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest', 'pyspecific', 'c_annotations', 'escape4chm', - 'asdl_highlight'] + 'asdl_highlight', 'peg_highlight'] doctest_global_setup = ''' diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index 83d0f85..acf8376 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -1,7 +1,19 @@ Full Grammar specification ========================== -This is the full Python grammar, as it is read by the parser generator and used -to parse Python source files: +This is the full Python grammar, derived directly from the grammar +used to generate the CPython parser (see :source:`Grammar/python.gram`). +The version here omits details related to code generation and +error recovery. -.. literalinclude:: ../../Grammar/Grammar +The notation is a mixture of `EBNF +<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_ +and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_. +In particular, ``&`` followed by a symbol, token or parenthesized +group indicates a positive lookahead (i.e., is required to match but +not consumed), while ``!`` indicates a negative lookahead (i.e., is +required _not_ to match). We use the ``|`` separator to mean PEG's +"ordered choice" (written as ``/`` in traditional PEG grammars). + +.. literalinclude:: ../../Grammar/python.gram + :language: peg diff --git a/Doc/tools/extensions/peg_highlight.py b/Doc/tools/extensions/peg_highlight.py new file mode 100644 index 0000000..f02515d --- /dev/null +++ b/Doc/tools/extensions/peg_highlight.py @@ -0,0 +1,75 @@ +from pygments.lexer import RegexLexer, bygroups, include +from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text + +from sphinx.highlighting import lexers + + +class PEGLexer(RegexLexer): + """Pygments Lexer for PEG grammar (.gram) files + + This lexer strips the following elements from the grammar: + + - Meta-tags + - Variable assignments + - Actions + - Lookaheads + - Rule types + - Rule options + - Rules named `invalid_*` or `incorrect_*` + """ + + name = "PEG" + aliases = ["peg"] + filenames = ["*.gram"] + _name = r"([^\W\d]\w*)" + _text_ws = r"(\s*)" + + tokens = { + "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),], + "lookaheads": [ + (r"(?<=\|\s)(&\w+\s?)", bygroups(None)), + (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)), + (r'(?<=\|\s)(&".+"\s?)', bygroups(None)), + (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)), + ], + "metas": [ + (r"(@\w+ '''(.|\n)+?''')", bygroups(None)), + (r"^(@.*)$", bygroups(None)), + ], + "actions": [(r"{(.|\n)+?}", bygroups(None)),], + "strings": [ + (r"'\w+?'", Keyword), + (r'"\w+?"', Keyword), + (r"'\W+?'", Text), + (r'"\W+?"', Text), + ], + "variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),], + "invalids": [ + (r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)), + (r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)), + (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),), + ], + "root": [ + include("invalids"), + include("ws"), + include("lookaheads"), + include("metas"), + include("actions"), + include("strings"), + include("variables"), + (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,), + ( + r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)", + bygroups(Name.Function, None, None, Punctuation), + ), + (_name, Name.Function), + (r"[\||\.|\+|\*|\?]", Operator), + (r"{|}|\(|\)|\[|\]", Punctuation), + (r".", Text), + ], + } + + +def setup(app): + lexers["peg"] = PEGLexer() + return {"version": "1.0", "parallel_read_safe": True} |