diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2018-12-22 09:18:40 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-22 09:18:40 (GMT) |
commit | 8ac658114dec4964479baecfbc439fceb40eaa79 (patch) | |
tree | e66c4c3beda293a6fdf01763306697d15d0af157 /Lib/token.py | |
parent | c1b4b0f6160e1919394586f44b12538505fed300 (diff) | |
download | cpython-8ac658114dec4964479baecfbc439fceb40eaa79.zip cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.gz cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.bz2 |
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from
"Lib/tokenize.py") and new files "Parser/token.c" (containing the code
moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included
in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by
"Tools/scripts/generate_token.py". The script overwrites files only if
needed and can be used on the read-only sources tree.
"Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py"
instead of been executable itself.
Added new make targets "regen-token" and "regen-symbol" which are now
dependencies of "regen-all".
The documentation contains now strings for operators and punctuation tokens.
Diffstat (limited to 'Lib/token.py')
-rw-r--r-- | Lib/token.py | 134 |
1 files changed, 52 insertions, 82 deletions
diff --git a/Lib/token.py b/Lib/token.py index ba13205..5af7e6b 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -1,15 +1,8 @@ -"""Token constants (from "token.h").""" +"""Token constants.""" +# Auto-generated by Tools/scripts/generate_token.py __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] -# This file is automatically generated; please don't muck it up! -# -# To update the symbols in this file, 'cd' to the top directory of -# the python source tree after building the interpreter and run: -# -# ./python Lib/token.py - -#--start constants-- ENDMARKER = 0 NAME = 1 NUMBER = 2 @@ -63,23 +56,70 @@ AT = 49 ATEQUAL = 50 RARROW = 51 ELLIPSIS = 52 -# Don't forget to update the table _PyParser_TokenNames in tokenizer.c! OP = 53 -ERRORTOKEN = 54 # These aren't used by the C tokenizer but are needed for tokenize.py +ERRORTOKEN = 54 COMMENT = 55 NL = 56 ENCODING = 57 N_TOKENS = 58 # Special definitions for cooperation with parser NT_OFFSET = 256 -#--end constants-- tok_name = {value: name for name, value in globals().items() if isinstance(value, int) and not name.startswith('_')} __all__.extend(tok_name.values()) +EXACT_TOKEN_TYPES = { + '!=': NOTEQUAL, + '%': PERCENT, + '%=': PERCENTEQUAL, + '&': AMPER, + '&=': AMPEREQUAL, + '(': LPAR, + ')': RPAR, + '*': STAR, + '**': DOUBLESTAR, + '**=': DOUBLESTAREQUAL, + '*=': STAREQUAL, + '+': PLUS, + '+=': PLUSEQUAL, + ',': COMMA, + '-': MINUS, + '-=': MINEQUAL, + '->': RARROW, + '.': DOT, + '...': ELLIPSIS, + '/': SLASH, + '//': DOUBLESLASH, + '//=': DOUBLESLASHEQUAL, + '/=': SLASHEQUAL, + ':': COLON, + ';': SEMI, + '<': LESS, + '<<': LEFTSHIFT, + '<<=': LEFTSHIFTEQUAL, + '<=': LESSEQUAL, + '=': EQUAL, + '==': EQEQUAL, + '>': GREATER, + '>=': GREATEREQUAL, + '>>': RIGHTSHIFT, + '>>=': RIGHTSHIFTEQUAL, + '@': AT, + '@=': ATEQUAL, + '[': LSQB, + ']': RSQB, + '^': CIRCUMFLEX, + '^=': CIRCUMFLEXEQUAL, + '{': LBRACE, + '|': VBAR, + '|=': VBAREQUAL, + '}': RBRACE, + '~': TILDE, +} + def ISTERMINAL(x): return x < NT_OFFSET @@ -88,73 +128,3 @@ def ISNONTERMINAL(x): def ISEOF(x): return x == ENDMARKER - - -def _main(): - import re - import sys - args = sys.argv[1:] - inFileName = args and args[0] or "Include/token.h" - outFileName = "Lib/token.py" - if len(args) > 1: - outFileName = args[1] - try: - fp = open(inFileName) - except OSError as err: - sys.stdout.write("I/O error: %s\n" % str(err)) - sys.exit(1) - with fp: - lines = fp.read().split("\n") - prog = re.compile( - r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", - re.IGNORECASE) - comment_regex = re.compile( - r"^\s*/\*\s*(.+?)\s*\*/\s*$", - re.IGNORECASE) - - tokens = {} - prev_val = None - for line in lines: - match = prog.match(line) - if match: - name, val = match.group(1, 2) - val = int(val) - tokens[val] = {'token': name} # reverse so we can sort them... - prev_val = val - else: - comment_match = comment_regex.match(line) - if comment_match and prev_val is not None: - comment = comment_match.group(1) - tokens[prev_val]['comment'] = comment - keys = sorted(tokens.keys()) - # load the output skeleton from the target: - try: - fp = open(outFileName) - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(2) - with fp: - format = fp.read().split("\n") - try: - start = format.index("#--start constants--") + 1 - end = format.index("#--end constants--") - except ValueError: - sys.stderr.write("target does not contain format markers") - sys.exit(3) - lines = [] - for key in keys: - lines.append("%s = %d" % (tokens[key]["token"], key)) - if "comment" in tokens[key]: - lines.append("# %s" % tokens[key]["comment"]) - format[start:end] = lines - try: - fp = open(outFileName, 'w') - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(4) - with fp: - fp.write("\n".join(format)) - - -if __name__ == "__main__": - _main() |