From 7431c3799efbd06ed03ee70b64420f45e83b3667 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 18 Jul 2024 19:38:29 +0100 Subject: GH-121970: Combine custom Pygments lexers into a package (#121976) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/conf.py | 3 +- Doc/tools/extensions/asdl_highlight.py | 53 ------------------- Doc/tools/extensions/lexers/__init__.py | 15 ++++++ Doc/tools/extensions/lexers/asdl_lexer.py | 42 +++++++++++++++ Doc/tools/extensions/lexers/peg_lexer.py | 79 ++++++++++++++++++++++++++++ Doc/tools/extensions/peg_highlight.py | 86 ------------------------------- 6 files changed, 137 insertions(+), 141 deletions(-) delete mode 100644 Doc/tools/extensions/asdl_highlight.py create mode 100644 Doc/tools/extensions/lexers/__init__.py create mode 100644 Doc/tools/extensions/lexers/asdl_lexer.py create mode 100644 Doc/tools/extensions/lexers/peg_lexer.py delete mode 100644 Doc/tools/extensions/peg_highlight.py diff --git a/Doc/conf.py b/Doc/conf.py index 6ab8e8c..7f925dc 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -18,11 +18,10 @@ from pyspecific import SOURCE_URI # --------------------- extensions = [ - 'asdl_highlight', 'c_annotations', 'escape4chm', 'glossary_search', - 'peg_highlight', + 'lexers', 'pyspecific', 'sphinx.ext.coverage', 'sphinx.ext.doctest', diff --git a/Doc/tools/extensions/asdl_highlight.py b/Doc/tools/extensions/asdl_highlight.py deleted file mode 100644 index 42863a4..0000000 --- a/Doc/tools/extensions/asdl_highlight.py +++ /dev/null @@ -1,53 +0,0 @@ -import sys -from pathlib import Path - -CPYTHON_ROOT = Path(__file__).resolve().parent.parent.parent.parent -sys.path.append(str(CPYTHON_ROOT / "Parser")) - -from pygments.lexer import RegexLexer, bygroups, include, words -from pygments.token import (Comment, Keyword, Name, Operator, - Punctuation, Text) - -from asdl import builtin_types -from sphinx.highlighting import lexers - -class ASDLLexer(RegexLexer): - name = "ASDL" - aliases = ["asdl"] - filenames = ["*.asdl"] - _name = r"([^\W\d]\w*)" - _text_ws = r"(\s*)" - - tokens = { - "ws": [ - (r"\n", Text), - (r"\s+", Text), - (r"--.*?$", Comment.Singleline), - ], - "root": [ - include("ws"), - ( - r"(module)" + _text_ws + _name, - bygroups(Keyword, Text, Name.Tag), - ), - ( - r"(\w+)(\*\s|\?\s|\s)(\w+)", - bygroups(Name.Builtin.Pseudo, Operator, Name), - ), - (words(builtin_types), Name.Builtin), - (r"attributes", Name.Builtin), - ( - _name + _text_ws + "(=)", - bygroups(Name, Text, Operator), - ), - (_name, Name.Class), - (r"\|", Operator), - (r"{|}|\(|\)", Punctuation), - (r".", Text), - ], - } - - -def setup(app): - lexers["asdl"] = ASDLLexer() - return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/extensions/lexers/__init__.py b/Doc/tools/extensions/lexers/__init__.py new file mode 100644 index 0000000..e12ac5b --- /dev/null +++ b/Doc/tools/extensions/lexers/__init__.py @@ -0,0 +1,15 @@ +from .asdl_lexer import ASDLLexer +from .peg_lexer import PEGLexer + + +def setup(app): + # Used for highlighting Parser/Python.asdl in library/ast.rst + app.add_lexer("asdl", ASDLLexer) + # Used for highlighting Grammar/python.gram in reference/grammar.rst + app.add_lexer("peg", PEGLexer) + + return { + "version": "1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/Doc/tools/extensions/lexers/asdl_lexer.py b/Doc/tools/extensions/lexers/asdl_lexer.py new file mode 100644 index 0000000..2cea058 --- /dev/null +++ b/Doc/tools/extensions/lexers/asdl_lexer.py @@ -0,0 +1,42 @@ +from pygments.lexer import RegexLexer, bygroups, include +from pygments.token import Comment, Keyword, Name, Operator, Punctuation, Text + + +class ASDLLexer(RegexLexer): + name = "ASDL" + aliases = ["asdl"] + filenames = ["*.asdl"] + _name = r"([^\W\d]\w*)" + _text_ws = r"(\s*)" + + tokens = { + "ws": [ + (r"\n", Text), + (r"\s+", Text), + (r"--.*?$", Comment.Singleline), + ], + "root": [ + include("ws"), + ( + r"(module)" + _text_ws + _name, + bygroups(Keyword, Text, Name.Tag), + ), + ( + r"(\w+)(\*\s|\?\s|\s)(\w+)", + bygroups(Name.Builtin.Pseudo, Operator, Name), + ), + # Keep in line with ``builtin_types`` from Parser/asdl.py. + # ASDL's 4 builtin types are + # constant, identifier, int, string + ('constant|identifier|int|string', Name.Builtin), + (r"attributes", Name.Builtin), + ( + _name + _text_ws + "(=)", + bygroups(Name, Text, Operator), + ), + (_name, Name.Class), + (r"\|", Operator), + (r"{|}|\(|\)", Punctuation), + (r".", Text), + ], + } diff --git a/Doc/tools/extensions/lexers/peg_lexer.py b/Doc/tools/extensions/lexers/peg_lexer.py new file mode 100644 index 0000000..827af20 --- /dev/null +++ b/Doc/tools/extensions/lexers/peg_lexer.py @@ -0,0 +1,79 @@ +from pygments.lexer import RegexLexer, bygroups, include +from pygments.token import Comment, Keyword, Name, Operator, Punctuation, Text + + +class PEGLexer(RegexLexer): + """Pygments Lexer for PEG grammar (.gram) files + + This lexer strips the following elements from the grammar: + + - Meta-tags + - Variable assignments + - Actions + - Lookaheads + - Rule types + - Rule options + - Rules named `invalid_*` or `incorrect_*` + """ + + name = "PEG" + aliases = ["peg"] + filenames = ["*.gram"] + _name = r"([^\W\d]\w*)" + _text_ws = r"(\s*)" + + tokens = { + "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),], + "lookaheads": [ + # Forced tokens + (r"(&&)(?=\w+\s?)", bygroups(None)), + (r"(&&)(?='.+'\s?)", bygroups(None)), + (r'(&&)(?=".+"\s?)', bygroups(None)), + (r"(&&)(?=\(.+\)\s?)", bygroups(None)), + + (r"(?<=\|\s)(&\w+\s?)", bygroups(None)), + (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)), + (r'(?<=\|\s)(&".+"\s?)', bygroups(None)), + (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)), + ], + "metas": [ + (r"(@\w+ '''(.|\n)+?''')", bygroups(None)), + (r"^(@.*)$", bygroups(None)), + ], + "actions": [ + (r"{(.|\n)+?}", bygroups(None)), + ], + "strings": [ + (r"'\w+?'", Keyword), + (r'"\w+?"', Keyword), + (r"'\W+?'", Text), + (r'"\W+?"', Text), + ], + "variables": [ + (_name + _text_ws + "(=)", bygroups(None, None, None),), + (_name + _text_ws + r"(\[[\w\d_\*]+?\])" + _text_ws + "(=)", bygroups(None, None, None, None, None),), + ], + "invalids": [ + (r"^(\s+\|\s+.*invalid_\w+.*\n)", bygroups(None)), + (r"^(\s+\|\s+.*incorrect_\w+.*\n)", bygroups(None)), + (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),), + ], + "root": [ + include("invalids"), + include("ws"), + include("lookaheads"), + include("metas"), + include("actions"), + include("strings"), + include("variables"), + (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,), + ( + r"^\s*" + _name + r"\s*" + r"(\[.*\])?" + r"\s*" + r"(\(.+\))?" + r"\s*(:)", + bygroups(Name.Function, None, None, Punctuation), + ), + (_name, Name.Function), + (r"[\||\.|\+|\*|\?]", Operator), + (r"{|}|\(|\)|\[|\]", Punctuation), + (r".", Text), + ], + } diff --git a/Doc/tools/extensions/peg_highlight.py b/Doc/tools/extensions/peg_highlight.py deleted file mode 100644 index 4bdc2ee..0000000 --- a/Doc/tools/extensions/peg_highlight.py +++ /dev/null @@ -1,86 +0,0 @@ -from pygments.lexer import RegexLexer, bygroups, include -from pygments.token import Comment, Keyword, Name, Operator, Punctuation, Text - -from sphinx.highlighting import lexers - - -class PEGLexer(RegexLexer): - """Pygments Lexer for PEG grammar (.gram) files - - This lexer strips the following elements from the grammar: - - - Meta-tags - - Variable assignments - - Actions - - Lookaheads - - Rule types - - Rule options - - Rules named `invalid_*` or `incorrect_*` - """ - - name = "PEG" - aliases = ["peg"] - filenames = ["*.gram"] - _name = r"([^\W\d]\w*)" - _text_ws = r"(\s*)" - - tokens = { - "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),], - "lookaheads": [ - # Forced tokens - (r"(&&)(?=\w+\s?)", bygroups(None)), - (r"(&&)(?='.+'\s?)", bygroups(None)), - (r'(&&)(?=".+"\s?)', bygroups(None)), - (r"(&&)(?=\(.+\)\s?)", bygroups(None)), - - (r"(?<=\|\s)(&\w+\s?)", bygroups(None)), - (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)), - (r'(?<=\|\s)(&".+"\s?)', bygroups(None)), - (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)), - ], - "metas": [ - (r"(@\w+ '''(.|\n)+?''')", bygroups(None)), - (r"^(@.*)$", bygroups(None)), - ], - "actions": [ - (r"{(.|\n)+?}", bygroups(None)), - ], - "strings": [ - (r"'\w+?'", Keyword), - (r'"\w+?"', Keyword), - (r"'\W+?'", Text), - (r'"\W+?"', Text), - ], - "variables": [ - (_name + _text_ws + "(=)", bygroups(None, None, None),), - (_name + _text_ws + r"(\[[\w\d_\*]+?\])" + _text_ws + "(=)", bygroups(None, None, None, None, None),), - ], - "invalids": [ - (r"^(\s+\|\s+.*invalid_\w+.*\n)", bygroups(None)), - (r"^(\s+\|\s+.*incorrect_\w+.*\n)", bygroups(None)), - (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),), - ], - "root": [ - include("invalids"), - include("ws"), - include("lookaheads"), - include("metas"), - include("actions"), - include("strings"), - include("variables"), - (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,), - ( - r"^\s*" + _name + r"\s*" + r"(\[.*\])?" + r"\s*" + r"(\(.+\))?" + r"\s*(:)", - bygroups(Name.Function, None, None, Punctuation), - ), - (_name, Name.Function), - (r"[\||\.|\+|\*|\?]", Operator), - (r"{|}|\(|\)|\[|\]", Punctuation), - (r".", Text), - ], - } - - -def setup(app): - lexers["peg"] = PEGLexer() - return {"version": "1.0", "parallel_read_safe": True} -- cgit v0.12