diff options
Diffstat (limited to 'Tools/scripts/generate_token.py')
-rwxr-xr-x | Tools/scripts/generate_token.py | 268 |
1 files changed, 0 insertions, 268 deletions
diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py deleted file mode 100755 index f2745e8..0000000 --- a/Tools/scripts/generate_token.py +++ /dev/null @@ -1,268 +0,0 @@ -#! /usr/bin/env python3 -# This script generates token related files from Grammar/Tokens: -# -# Doc/library/token-list.inc -# Include/token.h -# Parser/token.c -# Lib/token.py - - -NT_OFFSET = 256 - -def load_tokens(path): - tok_names = [] - string_to_tok = {} - ERRORTOKEN = None - with open(path) as fp: - for line in fp: - line = line.strip() - # strip comments - i = line.find('#') - if i >= 0: - line = line[:i].strip() - if not line: - continue - fields = line.split() - name = fields[0] - value = len(tok_names) - if name == 'ERRORTOKEN': - ERRORTOKEN = value - string = fields[1] if len(fields) > 1 else None - if string: - string = eval(string) - string_to_tok[string] = value - tok_names.append(name) - return tok_names, ERRORTOKEN, string_to_tok - - -def update_file(file, content): - try: - with open(file, 'r') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w') as fobj: - fobj.write(content) - return True - - -token_h_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -/* Token types */ -#ifndef Py_LIMITED_API -#ifndef Py_TOKEN_H -#define Py_TOKEN_H -#ifdef __cplusplus -extern "C" { -#endif - -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ - -%s\ -#define N_TOKENS %d -#define NT_OFFSET %d - -/* Special definitions for cooperation with parser */ - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) - - -PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ -PyAPI_FUNC(int) PyToken_OneChar(int); -PyAPI_FUNC(int) PyToken_TwoChars(int, int); -PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TOKEN_H */ -#endif /* Py_LIMITED_API */ -""" - -def make_h(infile, outfile='Include/token.h'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - defines = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - defines.append("#define %-15s %d\n" % (name, value)) - - if update_file(outfile, token_h_template % ( - ''.join(defines), - len(tok_names), - NT_OFFSET - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_c_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -#include "Python.h" -#include "token.h" - -/* Token names */ - -const char * const _PyParser_TokenNames[] = { -%s\ -}; - -/* Return the token corresponding to a single character */ - -int -PyToken_OneChar(int c1) -{ -%s\ - return OP; -} - -int -PyToken_TwoChars(int c1, int c2) -{ -%s\ - return OP; -} - -int -PyToken_ThreeChars(int c1, int c2, int c3) -{ -%s\ - return OP; -} -""" - -def generate_chars_to_token(mapping, n=1): - result = [] - write = result.append - indent = ' ' * n - write(indent) - write('switch (c%d) {\n' % (n,)) - for c in sorted(mapping): - write(indent) - value = mapping[c] - if isinstance(value, dict): - write("case '%s':\n" % (c,)) - write(generate_chars_to_token(value, n + 1)) - write(indent) - write(' break;\n') - else: - write("case '%s': return %s;\n" % (c, value)) - write(indent) - write('}\n') - return ''.join(result) - -def make_c(infile, outfile='Parser/token.c'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - string_to_tok['<>'] = string_to_tok['!='] - chars_to_token = {} - for string, value in string_to_tok.items(): - assert 1 <= len(string) <= 3 - name = tok_names[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name - - names = [] - for value, name in enumerate(tok_names): - if value >= ERRORTOKEN: - name = '<%s>' % name - names.append(' "%s",\n' % name) - names.append(' "<N_TOKENS>",\n') - - if update_file(outfile, token_c_template % ( - ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_inc_template = """\ -.. Auto-generated by Tools/scripts/generate_token.py -%s -.. data:: N_TOKENS - -.. data:: NT_OFFSET -""" - -def make_rst(infile, outfile='Doc/library/token-list.inc'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - tok_to_string = {value: s for s, value in string_to_tok.items()} - - names = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - names.append('.. data:: %s' % (name,)) - if value in tok_to_string: - names.append('') - names.append(' Token value for ``"%s"``.' % tok_to_string[value]) - names.append('') - - if update_file(outfile, token_inc_template % '\n'.join(names)): - print("%s regenerated from %s" % (outfile, infile)) - - -token_py_template = '''\ -"""Token constants.""" -# Auto-generated by Tools/scripts/generate_token.py - -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] - -%s -N_TOKENS = %d -# Special definitions for cooperation with parser -NT_OFFSET = %d - -tok_name = {value: name - for name, value in globals().items() - if isinstance(value, int) and not name.startswith('_')} -__all__.extend(tok_name.values()) - -EXACT_TOKEN_TYPES = { -%s -} - -def ISTERMINAL(x): - return x < NT_OFFSET - -def ISNONTERMINAL(x): - return x >= NT_OFFSET - -def ISEOF(x): - return x == ENDMARKER -''' - -def make_py(infile, outfile='Lib/token.py'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - constants = [] - for value, name in enumerate(tok_names): - constants.append('%s = %d' % (name, value)) - constants.insert(ERRORTOKEN, - "# These aren't used by the C tokenizer but are needed for tokenize.py") - - token_types = [] - for s, value in sorted(string_to_tok.items()): - token_types.append(' %r: %s,' % (s, tok_names[value])) - - if update_file(outfile, token_py_template % ( - '\n'.join(constants), - len(tok_names), - NT_OFFSET, - '\n'.join(token_types), - )): - print("%s regenerated from %s" % (outfile, infile)) - - -def main(op, infile='Grammar/Tokens', *args): - make = globals()['make_' + op] - make(infile, *args) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) |