summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2018-12-22 09:18:40 (GMT)
committerGitHub <noreply@github.com>2018-12-22 09:18:40 (GMT)
commit8ac658114dec4964479baecfbc439fceb40eaa79 (patch)
treee66c4c3beda293a6fdf01763306697d15d0af157 /Tools
parentc1b4b0f6160e1919394586f44b12538505fed300 (diff)
downloadcpython-8ac658114dec4964479baecfbc439fceb40eaa79.zip
cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.gz
cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.bz2
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
Diffstat (limited to 'Tools')
-rwxr-xr-xTools/scripts/generate_symbol_py.py53
-rw-r--r--Tools/scripts/generate_token.py268
2 files changed, 321 insertions, 0 deletions
diff --git a/Tools/scripts/generate_symbol_py.py b/Tools/scripts/generate_symbol_py.py
new file mode 100755
index 0000000..9219b09
--- /dev/null
+++ b/Tools/scripts/generate_symbol_py.py
@@ -0,0 +1,53 @@
+#! /usr/bin/env python3
+# This script generates the symbol.py source file.
+
+import sys
+import re
+
+def main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"):
+ try:
+ fp = open(inFileName)
+ except OSError as err:
+ sys.stderr.write("I/O error: %s\n" % str(err))
+ sys.exit(1)
+ with fp:
+ lines = fp.read().split("\n")
+ prog = re.compile(
+ "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
+ re.IGNORECASE)
+ tokens = {}
+ for line in lines:
+ match = prog.match(line)
+ if match:
+ name, val = match.group(1, 2)
+ val = int(val)
+ tokens[val] = name # reverse so we can sort them...
+ keys = sorted(tokens.keys())
+ # load the output skeleton from the target:
+ try:
+ fp = open(outFileName)
+ except OSError as err:
+ sys.stderr.write("I/O error: %s\n" % str(err))
+ sys.exit(2)
+ with fp:
+ format = fp.read().split("\n")
+ try:
+ start = format.index("#--start constants--") + 1
+ end = format.index("#--end constants--")
+ except ValueError:
+ sys.stderr.write("target does not contain format markers")
+ sys.exit(3)
+ lines = []
+ for val in keys:
+ lines.append("%s = %d" % (tokens[val], val))
+ format[start:end] = lines
+ try:
+ fp = open(outFileName, 'w')
+ except OSError as err:
+ sys.stderr.write("I/O error: %s\n" % str(err))
+ sys.exit(4)
+ with fp:
+ fp.write("\n".join(format))
+
+if __name__ == '__main__':
+ main(*sys.argv[1:])
diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py
new file mode 100644
index 0000000..f2745e8
--- /dev/null
+++ b/Tools/scripts/generate_token.py
@@ -0,0 +1,268 @@
+#! /usr/bin/env python3
+# This script generates token related files from Grammar/Tokens:
+#
+# Doc/library/token-list.inc
+# Include/token.h
+# Parser/token.c
+# Lib/token.py
+
+
+NT_OFFSET = 256
+
+def load_tokens(path):
+ tok_names = []
+ string_to_tok = {}
+ ERRORTOKEN = None
+ with open(path) as fp:
+ for line in fp:
+ line = line.strip()
+ # strip comments
+ i = line.find('#')
+ if i >= 0:
+ line = line[:i].strip()
+ if not line:
+ continue
+ fields = line.split()
+ name = fields[0]
+ value = len(tok_names)
+ if name == 'ERRORTOKEN':
+ ERRORTOKEN = value
+ string = fields[1] if len(fields) > 1 else None
+ if string:
+ string = eval(string)
+ string_to_tok[string] = value
+ tok_names.append(name)
+ return tok_names, ERRORTOKEN, string_to_tok
+
+
+def update_file(file, content):
+ try:
+ with open(file, 'r') as fobj:
+ if fobj.read() == content:
+ return False
+ except (OSError, ValueError):
+ pass
+ with open(file, 'w') as fobj:
+ fobj.write(content)
+ return True
+
+
+token_h_template = """\
+/* Auto-generated by Tools/scripts/generate_token.py */
+
+/* Token types */
+#ifndef Py_LIMITED_API
+#ifndef Py_TOKEN_H
+#define Py_TOKEN_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
+
+%s\
+#define N_TOKENS %d
+#define NT_OFFSET %d
+
+/* Special definitions for cooperation with parser */
+
+#define ISTERMINAL(x) ((x) < NT_OFFSET)
+#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
+#define ISEOF(x) ((x) == ENDMARKER)
+
+
+PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
+PyAPI_FUNC(int) PyToken_OneChar(int);
+PyAPI_FUNC(int) PyToken_TwoChars(int, int);
+PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_TOKEN_H */
+#endif /* Py_LIMITED_API */
+"""
+
+def make_h(infile, outfile='Include/token.h'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ defines = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ defines.append("#define %-15s %d\n" % (name, value))
+
+ if update_file(outfile, token_h_template % (
+ ''.join(defines),
+ len(tok_names),
+ NT_OFFSET
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_c_template = """\
+/* Auto-generated by Tools/scripts/generate_token.py */
+
+#include "Python.h"
+#include "token.h"
+
+/* Token names */
+
+const char * const _PyParser_TokenNames[] = {
+%s\
+};
+
+/* Return the token corresponding to a single character */
+
+int
+PyToken_OneChar(int c1)
+{
+%s\
+ return OP;
+}
+
+int
+PyToken_TwoChars(int c1, int c2)
+{
+%s\
+ return OP;
+}
+
+int
+PyToken_ThreeChars(int c1, int c2, int c3)
+{
+%s\
+ return OP;
+}
+"""
+
+def generate_chars_to_token(mapping, n=1):
+ result = []
+ write = result.append
+ indent = ' ' * n
+ write(indent)
+ write('switch (c%d) {\n' % (n,))
+ for c in sorted(mapping):
+ write(indent)
+ value = mapping[c]
+ if isinstance(value, dict):
+ write("case '%s':\n" % (c,))
+ write(generate_chars_to_token(value, n + 1))
+ write(indent)
+ write(' break;\n')
+ else:
+ write("case '%s': return %s;\n" % (c, value))
+ write(indent)
+ write('}\n')
+ return ''.join(result)
+
+def make_c(infile, outfile='Parser/token.c'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ string_to_tok['<>'] = string_to_tok['!=']
+ chars_to_token = {}
+ for string, value in string_to_tok.items():
+ assert 1 <= len(string) <= 3
+ name = tok_names[value]
+ m = chars_to_token.setdefault(len(string), {})
+ for c in string[:-1]:
+ m = m.setdefault(c, {})
+ m[string[-1]] = name
+
+ names = []
+ for value, name in enumerate(tok_names):
+ if value >= ERRORTOKEN:
+ name = '<%s>' % name
+ names.append(' "%s",\n' % name)
+ names.append(' "<N_TOKENS>",\n')
+
+ if update_file(outfile, token_c_template % (
+ ''.join(names),
+ generate_chars_to_token(chars_to_token[1]),
+ generate_chars_to_token(chars_to_token[2]),
+ generate_chars_to_token(chars_to_token[3])
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_inc_template = """\
+.. Auto-generated by Tools/scripts/generate_token.py
+%s
+.. data:: N_TOKENS
+
+.. data:: NT_OFFSET
+"""
+
+def make_rst(infile, outfile='Doc/library/token-list.inc'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ tok_to_string = {value: s for s, value in string_to_tok.items()}
+
+ names = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ names.append('.. data:: %s' % (name,))
+ if value in tok_to_string:
+ names.append('')
+ names.append(' Token value for ``"%s"``.' % tok_to_string[value])
+ names.append('')
+
+ if update_file(outfile, token_inc_template % '\n'.join(names)):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_py_template = '''\
+"""Token constants."""
+# Auto-generated by Tools/scripts/generate_token.py
+
+__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
+
+%s
+N_TOKENS = %d
+# Special definitions for cooperation with parser
+NT_OFFSET = %d
+
+tok_name = {value: name
+ for name, value in globals().items()
+ if isinstance(value, int) and not name.startswith('_')}
+__all__.extend(tok_name.values())
+
+EXACT_TOKEN_TYPES = {
+%s
+}
+
+def ISTERMINAL(x):
+ return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+ return x >= NT_OFFSET
+
+def ISEOF(x):
+ return x == ENDMARKER
+'''
+
+def make_py(infile, outfile='Lib/token.py'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ constants = []
+ for value, name in enumerate(tok_names):
+ constants.append('%s = %d' % (name, value))
+ constants.insert(ERRORTOKEN,
+ "# These aren't used by the C tokenizer but are needed for tokenize.py")
+
+ token_types = []
+ for s, value in sorted(string_to_tok.items()):
+ token_types.append(' %r: %s,' % (s, tok_names[value]))
+
+ if update_file(outfile, token_py_template % (
+ '\n'.join(constants),
+ len(tok_names),
+ NT_OFFSET,
+ '\n'.join(token_types),
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+def main(op, infile='Grammar/Tokens', *args):
+ make = globals()['make_' + op]
+ make(infile, *args)
+
+
+if __name__ == '__main__':
+ import sys
+ main(*sys.argv[1:])