diff options
author | Yury Selivanov <yury@magic.io> | 2016-09-09 03:50:03 (GMT) |
---|---|---|
committer | Yury Selivanov <yury@magic.io> | 2016-09-09 03:50:03 (GMT) |
commit | f8cb8a16a344ab208fd46876c4b63604987347b8 (patch) | |
tree | c44caa48291401d1e1e388004d2762513ac88c93 /Tools/parser | |
parent | 09ad17810c38d1aaae02de69084dd2a8ad9f5cdb (diff) | |
download | cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.zip cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.gz cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.bz2 |
Issue #27985: Implement PEP 526 -- Syntax for Variable Annotations.
Patch by Ivan Levkivskyi.
Diffstat (limited to 'Tools/parser')
-rw-r--r-- | Tools/parser/com2ann.py | 308 | ||||
-rw-r--r-- | Tools/parser/unparse.py | 13 |
2 files changed, 321 insertions, 0 deletions
diff --git a/Tools/parser/com2ann.py b/Tools/parser/com2ann.py new file mode 100644 index 0000000..1f46e73 --- /dev/null +++ b/Tools/parser/com2ann.py @@ -0,0 +1,308 @@ +"""Helper module to tranlate 3.5 type comments to 3.6 variable annotations.""" +import re +import os +import ast +import argparse +import tokenize +from collections import defaultdict +from textwrap import dedent +from io import BytesIO + +__all__ = ['com2ann', 'TYPE_COM'] + +TYPE_COM = re.compile('\s*#\s*type\s*:.*$', flags=re.DOTALL) +TRAIL_OR_COM = re.compile('\s*$|\s*#.*$', flags=re.DOTALL) + + +class _Data: + """Internal class describing global data on file.""" + def __init__(self, lines, tokens): + self.lines = lines + self.tokens = tokens + ttab = defaultdict(list) # maps line number to token numbers + for i, tok in enumerate(tokens): + ttab[tok.start[0]].append(i) + self.ttab = ttab + self.success = [] # list of lines where type comments where processed + self.fail = [] # list of lines where type comments where rejected + + +def skip_blank(d, lno): + while d.lines[lno].strip() == '': + lno += 1 + return lno + + +def find_start(d, lcom): + """Find first char of the assignment target.""" + i = d.ttab[lcom + 1][-2] # index of type comment token in tokens list + while ((d.tokens[i].exact_type != tokenize.NEWLINE) and + (d.tokens[i].exact_type != tokenize.ENCODING)): + i -= 1 + lno = d.tokens[i].start[0] + return skip_blank(d, lno) + + +def check_target(stmt): + if len(stmt.body): + assign = stmt.body[0] + else: + return False + if isinstance(assign, ast.Assign) and len(assign.targets) == 1: + targ = assign.targets[0] + else: + return False + if (isinstance(targ, ast.Name) or isinstance(targ, ast.Attribute) + or isinstance(targ, ast.Subscript)): + return True + return False + + +def find_eq(d, lstart): + """Find equal sign starting from lstart taking care about d[f(x=1)] = 5.""" + col = pars = 0 + lno = lstart + while d.lines[lno][col] != '=' or pars != 0: + ch = d.lines[lno][col] + if ch in '([{': + pars += 1 + elif ch in ')]}': + pars -= 1 + if ch == '#' or col == len(d.lines[lno])-1: + lno = skip_blank(d, lno+1) + col = 0 + else: + col += 1 + return lno, col + + +def find_val(d, poseq): + """Find position of first char of assignment value starting from poseq.""" + lno, col = poseq + while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'): + if col == len(d.lines[lno])-1: + lno += 1 + col = 0 + else: + col += 1 + return lno, col + + +def find_targ(d, poseq): + """Find position of last char of target (annotation goes here).""" + lno, col = poseq + while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'): + if col == 0: + lno -= 1 + col = len(d.lines[lno])-1 + else: + col -= 1 + return lno, col+1 + + +def trim(new_lines, string, ltarg, poseq, lcom, ccom): + """Remove None or Ellipsis from assignment value. + + Also remove parens if one has (None), (...) etc. + string -- 'None' or '...' + ltarg -- line where last char of target is located + poseq -- position of equal sign + lcom, ccom -- position of type comment + """ + nopars = lambda s: s.replace('(', '').replace(')', '') + leq, ceq = poseq + end = ccom if leq == lcom else len(new_lines[leq]) + subline = new_lines[leq][:ceq] + if leq == ltarg: + subline = subline.rstrip() + new_lines[leq] = subline + (new_lines[leq][end:] if leq == lcom + else new_lines[leq][ceq+1:end]) + + for lno in range(leq+1,lcom): + new_lines[lno] = nopars(new_lines[lno]) + + if lcom != leq: + subline = nopars(new_lines[lcom][:ccom]).replace(string, '') + if (not subline.isspace()): + subline = subline.rstrip() + new_lines[lcom] = subline + new_lines[lcom][ccom:] + + +def _com2ann(d, drop_None, drop_Ellipsis): + new_lines = d.lines[:] + for lcom, line in enumerate(d.lines): + match = re.search(TYPE_COM, line) + if match: + # strip " # type : annotation \n" -> "annotation \n" + tp = match.group().lstrip()[1:].lstrip()[4:].lstrip()[1:].lstrip() + submatch = re.search(TRAIL_OR_COM, tp) + subcom = '' + if submatch and submatch.group(): + subcom = submatch.group() + tp = tp[:submatch.start()] + if tp == 'ignore': + continue + ccom = match.start() + if not any(d.tokens[i].exact_type == tokenize.COMMENT + for i in d.ttab[lcom + 1]): + d.fail.append(lcom) + continue # type comment inside string + lstart = find_start(d, lcom) + stmt_str = dedent(''.join(d.lines[lstart:lcom+1])) + try: + stmt = ast.parse(stmt_str) + except SyntaxError: + d.fail.append(lcom) + continue # for or with statements + if not check_target(stmt): + d.fail.append(lcom) + continue + + d.success.append(lcom) + val = stmt.body[0].value + + # writing output now + poseq = find_eq(d, lstart) + lval, cval = find_val(d, poseq) + ltarg, ctarg = find_targ(d, poseq) + + op_par = '' + cl_par = '' + if isinstance(val, ast.Tuple): + if d.lines[lval][cval] != '(': + op_par = '(' + cl_par = ')' + # write the comment first + new_lines[lcom] = d.lines[lcom][:ccom].rstrip() + cl_par + subcom + ccom = len(d.lines[lcom][:ccom].rstrip()) + + string = False + if isinstance(val, ast.Tuple): + # t = 1, 2 -> t = (1, 2); only latter is allowed with annotation + free_place = int(new_lines[lval][cval-2:cval] == ' ') + new_lines[lval] = (new_lines[lval][:cval-free_place] + + op_par + new_lines[lval][cval:]) + elif isinstance(val, ast.Ellipsis) and drop_Ellipsis: + string = '...' + elif (isinstance(val, ast.NameConstant) and + val.value is None and drop_None): + string = 'None' + if string: + trim(new_lines, string, ltarg, poseq, lcom, ccom) + + # finally write an annotation + new_lines[ltarg] = (new_lines[ltarg][:ctarg] + + ': ' + tp + new_lines[ltarg][ctarg:]) + return ''.join(new_lines) + + +def com2ann(code, *, drop_None=False, drop_Ellipsis=False, silent=False): + """Translate type comments to type annotations in code. + + Take code as string and return this string where:: + + variable = value # type: annotation # real comment + + is translated to:: + + variable: annotation = value # real comment + + For unsupported syntax cases, the type comments are + left intact. If drop_None is True or if drop_Ellipsis + is True translate correcpondingly:: + + variable = None # type: annotation + variable = ... # type: annotation + + into:: + + variable: annotation + + The tool tries to preserve code formatting as much as + possible, but an exact translation is not guarateed. + A summary of translated comments id printed by default. + """ + try: + ast.parse(code) # we want to work only with file without syntax errors + except SyntaxError: + return None + lines = code.splitlines(keepends=True) + rl = BytesIO(code.encode('utf-8')).readline + tokens = list(tokenize.tokenize(rl)) + + data = _Data(lines, tokens) + new_code = _com2ann(data, drop_None, drop_Ellipsis) + + if not silent: + if data.success: + print('Comments translated on lines:', + ', '.join(str(lno+1) for lno in data.success)) + if data.fail: + print('Comments rejected on lines:', + ', '.join(str(lno+1) for lno in data.fail)) + if not data.success and not data.fail: + print('No type comments found') + + return new_code + + +def translate_file(infile, outfile, dnone, dell, silent): + try: + descr = tokenize.open(infile) + except SyntaxError: + print("Cannot open", infile) + return + with descr as f: + code = f.read() + enc = f.encoding + if not silent: + print('File:', infile) + new_code = com2ann(code, drop_None=dnone, + drop_Ellipsis=dell, + silent=silent) + if new_code is None: + print("SyntaxError in", infile) + return + with open(outfile, 'wb') as f: + f.write((new_code).encode(enc)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-o", "--outfile", + help="output file, will be overwritten if exists,\n" + "defaults to input file") + parser.add_argument("infile", + help="input file or directory for translation, must\n" + "contain no syntax errors, for directory\n" + "the outfile is ignored and translation is\n" + "made in place") + parser.add_argument("-s", "--silent", + help="Do not print summary for line numbers of\n" + "translated and rejected comments", + action="store_true") + parser.add_argument("-n", "--drop-none", + help="drop any None as assignment value during\n" + "translation if it is annotated by a type coment", + action="store_true") + parser.add_argument("-e", "--drop-ellipsis", + help="drop any Ellipsis (...) as assignment value during\n" + "translation if it is annotated by a type coment", + action="store_true") + args = parser.parse_args() + if args.outfile is None: + args.outfile = args.infile + + if os.path.isfile(args.infile): + translate_file(args.infile, args.outfile, + args.drop_none, args.drop_ellipsis, args.silent) + else: + for root, dirs, files in os.walk(args.infile): + for afile in files: + _, ext = os.path.splitext(afile) + if ext == '.py' or ext == '.pyi': + fname = os.path.join(root, afile) + translate_file(fname, fname, + args.drop_none, args.drop_ellipsis, + args.silent) diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py index 7203057..6c296bd 100644 --- a/Tools/parser/unparse.py +++ b/Tools/parser/unparse.py @@ -104,6 +104,19 @@ class Unparser: self.write(" "+self.binop[t.op.__class__.__name__]+"= ") self.dispatch(t.value) + def _AnnAssign(self, t): + self.fill() + if not t.simple and isinstance(t.target, ast.Name): + self.write('(') + self.dispatch(t.target) + if not t.simple and isinstance(t.target, ast.Name): + self.write(')') + self.write(": ") + self.dispatch(t.annotation) + if t.value: + self.write(" = ") + self.dispatch(t.value) + def _Return(self, t): self.fill("return") if t.value: |