summaryrefslogtreecommitdiffstats
path: root/Tools/scripts/generate_token.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/scripts/generate_token.py')
-rw-r--r--Tools/scripts/generate_token.py268
1 files changed, 268 insertions, 0 deletions
diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py
new file mode 100644
index 0000000..f2745e8
--- /dev/null
+++ b/Tools/scripts/generate_token.py
@@ -0,0 +1,268 @@
+#! /usr/bin/env python3
+# This script generates token related files from Grammar/Tokens:
+#
+# Doc/library/token-list.inc
+# Include/token.h
+# Parser/token.c
+# Lib/token.py
+
+
+NT_OFFSET = 256
+
+def load_tokens(path):
+ tok_names = []
+ string_to_tok = {}
+ ERRORTOKEN = None
+ with open(path) as fp:
+ for line in fp:
+ line = line.strip()
+ # strip comments
+ i = line.find('#')
+ if i >= 0:
+ line = line[:i].strip()
+ if not line:
+ continue
+ fields = line.split()
+ name = fields[0]
+ value = len(tok_names)
+ if name == 'ERRORTOKEN':
+ ERRORTOKEN = value
+ string = fields[1] if len(fields) > 1 else None
+ if string:
+ string = eval(string)
+ string_to_tok[string] = value
+ tok_names.append(name)
+ return tok_names, ERRORTOKEN, string_to_tok
+
+
+def update_file(file, content):
+ try:
+ with open(file, 'r') as fobj:
+ if fobj.read() == content:
+ return False
+ except (OSError, ValueError):
+ pass
+ with open(file, 'w') as fobj:
+ fobj.write(content)
+ return True
+
+
+token_h_template = """\
+/* Auto-generated by Tools/scripts/generate_token.py */
+
+/* Token types */
+#ifndef Py_LIMITED_API
+#ifndef Py_TOKEN_H
+#define Py_TOKEN_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
+
+%s\
+#define N_TOKENS %d
+#define NT_OFFSET %d
+
+/* Special definitions for cooperation with parser */
+
+#define ISTERMINAL(x) ((x) < NT_OFFSET)
+#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
+#define ISEOF(x) ((x) == ENDMARKER)
+
+
+PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
+PyAPI_FUNC(int) PyToken_OneChar(int);
+PyAPI_FUNC(int) PyToken_TwoChars(int, int);
+PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_TOKEN_H */
+#endif /* Py_LIMITED_API */
+"""
+
+def make_h(infile, outfile='Include/token.h'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ defines = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ defines.append("#define %-15s %d\n" % (name, value))
+
+ if update_file(outfile, token_h_template % (
+ ''.join(defines),
+ len(tok_names),
+ NT_OFFSET
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_c_template = """\
+/* Auto-generated by Tools/scripts/generate_token.py */
+
+#include "Python.h"
+#include "token.h"
+
+/* Token names */
+
+const char * const _PyParser_TokenNames[] = {
+%s\
+};
+
+/* Return the token corresponding to a single character */
+
+int
+PyToken_OneChar(int c1)
+{
+%s\
+ return OP;
+}
+
+int
+PyToken_TwoChars(int c1, int c2)
+{
+%s\
+ return OP;
+}
+
+int
+PyToken_ThreeChars(int c1, int c2, int c3)
+{
+%s\
+ return OP;
+}
+"""
+
+def generate_chars_to_token(mapping, n=1):
+ result = []
+ write = result.append
+ indent = ' ' * n
+ write(indent)
+ write('switch (c%d) {\n' % (n,))
+ for c in sorted(mapping):
+ write(indent)
+ value = mapping[c]
+ if isinstance(value, dict):
+ write("case '%s':\n" % (c,))
+ write(generate_chars_to_token(value, n + 1))
+ write(indent)
+ write(' break;\n')
+ else:
+ write("case '%s': return %s;\n" % (c, value))
+ write(indent)
+ write('}\n')
+ return ''.join(result)
+
+def make_c(infile, outfile='Parser/token.c'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ string_to_tok['<>'] = string_to_tok['!=']
+ chars_to_token = {}
+ for string, value in string_to_tok.items():
+ assert 1 <= len(string) <= 3
+ name = tok_names[value]
+ m = chars_to_token.setdefault(len(string), {})
+ for c in string[:-1]:
+ m = m.setdefault(c, {})
+ m[string[-1]] = name
+
+ names = []
+ for value, name in enumerate(tok_names):
+ if value >= ERRORTOKEN:
+ name = '<%s>' % name
+ names.append(' "%s",\n' % name)
+ names.append(' "<N_TOKENS>",\n')
+
+ if update_file(outfile, token_c_template % (
+ ''.join(names),
+ generate_chars_to_token(chars_to_token[1]),
+ generate_chars_to_token(chars_to_token[2]),
+ generate_chars_to_token(chars_to_token[3])
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_inc_template = """\
+.. Auto-generated by Tools/scripts/generate_token.py
+%s
+.. data:: N_TOKENS
+
+.. data:: NT_OFFSET
+"""
+
+def make_rst(infile, outfile='Doc/library/token-list.inc'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ tok_to_string = {value: s for s, value in string_to_tok.items()}
+
+ names = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ names.append('.. data:: %s' % (name,))
+ if value in tok_to_string:
+ names.append('')
+ names.append(' Token value for ``"%s"``.' % tok_to_string[value])
+ names.append('')
+
+ if update_file(outfile, token_inc_template % '\n'.join(names)):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_py_template = '''\
+"""Token constants."""
+# Auto-generated by Tools/scripts/generate_token.py
+
+__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
+
+%s
+N_TOKENS = %d
+# Special definitions for cooperation with parser
+NT_OFFSET = %d
+
+tok_name = {value: name
+ for name, value in globals().items()
+ if isinstance(value, int) and not name.startswith('_')}
+__all__.extend(tok_name.values())
+
+EXACT_TOKEN_TYPES = {
+%s
+}
+
+def ISTERMINAL(x):
+ return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+ return x >= NT_OFFSET
+
+def ISEOF(x):
+ return x == ENDMARKER
+'''
+
+def make_py(infile, outfile='Lib/token.py'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ constants = []
+ for value, name in enumerate(tok_names):
+ constants.append('%s = %d' % (name, value))
+ constants.insert(ERRORTOKEN,
+ "# These aren't used by the C tokenizer but are needed for tokenize.py")
+
+ token_types = []
+ for s, value in sorted(string_to_tok.items()):
+ token_types.append(' %r: %s,' % (s, tok_names[value]))
+
+ if update_file(outfile, token_py_template % (
+ '\n'.join(constants),
+ len(tok_names),
+ NT_OFFSET,
+ '\n'.join(token_types),
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+def main(op, infile='Grammar/Tokens', *args):
+ make = globals()['make_' + op]
+ make(infile, *args)
+
+
+if __name__ == '__main__':
+ import sys
+ main(*sys.argv[1:])