Issue #27985: Implement PEP 526 -- Syntax for Variable Annotations.
Patch by Ivan Levkivskyi.
diff --git a/Tools/parser/ b/Tools/parser/
new file mode 100644
index 0000000..1f46e73
--- /dev/null
+++ b/Tools/parser/
@@ -0,0 +1,308 @@
+"""Helper module to tranlate 3.5 type comments to 3.6 variable annotations."""
+import re
+import os
+import ast
+import argparse
+import tokenize
+from collections import defaultdict
+from textwrap import dedent
+from io import BytesIO
+__all__ = ['com2ann', 'TYPE_COM']
+TYPE_COM = re.compile('\s*#\s*type\s*:.*$', flags=re.DOTALL)
+TRAIL_OR_COM = re.compile('\s*$|\s*#.*$', flags=re.DOTALL)
+class _Data:
+ """Internal class describing global data on file."""
+ def __init__(self, lines, tokens):
+ self.lines = lines
+ self.tokens = tokens
+ ttab = defaultdict(list) # maps line number to token numbers
+ for i, tok in enumerate(tokens):
+ ttab[tok.start[0]].append(i)
+ self.ttab = ttab
+ self.success = [] # list of lines where type comments where processed
+ = [] # list of lines where type comments where rejected
+def skip_blank(d, lno):
+ while d.lines[lno].strip() == '':
+ lno += 1
+ return lno
+def find_start(d, lcom):
+ """Find first char of the assignment target."""
+ i = d.ttab[lcom + 1][-2] # index of type comment token in tokens list
+ while ((d.tokens[i].exact_type != tokenize.NEWLINE) and
+ (d.tokens[i].exact_type != tokenize.ENCODING)):
+ i -= 1
+ lno = d.tokens[i].start[0]
+ return skip_blank(d, lno)
+def check_target(stmt):
+ if len(stmt.body):
+ assign = stmt.body[0]
+ else:
+ return False
+ if isinstance(assign, ast.Assign) and len(assign.targets) == 1:
+ targ = assign.targets[0]
+ else:
+ return False
+ if (isinstance(targ, ast.Name) or isinstance(targ, ast.Attribute)
+ or isinstance(targ, ast.Subscript)):
+ return True
+ return False
+def find_eq(d, lstart):
+ """Find equal sign starting from lstart taking care about d[f(x=1)] = 5."""
+ col = pars = 0
+ lno = lstart
+ while d.lines[lno][col] != '=' or pars != 0:
+ ch = d.lines[lno][col]
+ if ch in '([{':
+ pars += 1
+ elif ch in ')]}':
+ pars -= 1
+ if ch == '#' or col == len(d.lines[lno])-1:
+ lno = skip_blank(d, lno+1)
+ col = 0
+ else:
+ col += 1
+ return lno, col
+def find_val(d, poseq):
+ """Find position of first char of assignment value starting from poseq."""
+ lno, col = poseq
+ while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
+ if col == len(d.lines[lno])-1:
+ lno += 1
+ col = 0
+ else:
+ col += 1
+ return lno, col
+def find_targ(d, poseq):
+ """Find position of last char of target (annotation goes here)."""
+ lno, col = poseq
+ while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
+ if col == 0:
+ lno -= 1
+ col = len(d.lines[lno])-1
+ else:
+ col -= 1
+ return lno, col+1
+def trim(new_lines, string, ltarg, poseq, lcom, ccom):
+ """Remove None or Ellipsis from assignment value.
+ Also remove parens if one has (None), (...) etc.
+ string -- 'None' or '...'
+ ltarg -- line where last char of target is located
+ poseq -- position of equal sign
+ lcom, ccom -- position of type comment
+ """
+ nopars = lambda s: s.replace('(', '').replace(')', '')
+ leq, ceq = poseq
+ end = ccom if leq == lcom else len(new_lines[leq])
+ subline = new_lines[leq][:ceq]
+ if leq == ltarg:
+ subline = subline.rstrip()
+ new_lines[leq] = subline + (new_lines[leq][end:] if leq == lcom
+ else new_lines[leq][ceq+1:end])
+ for lno in range(leq+1,lcom):
+ new_lines[lno] = nopars(new_lines[lno])
+ if lcom != leq:
+ subline = nopars(new_lines[lcom][:ccom]).replace(string, '')
+ if (not subline.isspace()):
+ subline = subline.rstrip()
+ new_lines[lcom] = subline + new_lines[lcom][ccom:]
+def _com2ann(d, drop_None, drop_Ellipsis):
+ new_lines = d.lines[:]
+ for lcom, line in enumerate(d.lines):
+ match =, line)
+ if match:
+ # strip " # type : annotation \n" -> "annotation \n"
+ tp =[1:].lstrip()[4:].lstrip()[1:].lstrip()
+ submatch =, tp)
+ subcom = ''
+ if submatch and
+ subcom =
+ tp = tp[:submatch.start()]
+ if tp == 'ignore':
+ continue
+ ccom = match.start()
+ if not any(d.tokens[i].exact_type == tokenize.COMMENT
+ for i in d.ttab[lcom + 1]):
+ continue # type comment inside string
+ lstart = find_start(d, lcom)
+ stmt_str = dedent(''.join(d.lines[lstart:lcom+1]))
+ try:
+ stmt = ast.parse(stmt_str)
+ except SyntaxError:
+ continue # for or with statements
+ if not check_target(stmt):
+ continue
+ d.success.append(lcom)
+ val = stmt.body[0].value
+ # writing output now
+ poseq = find_eq(d, lstart)
+ lval, cval = find_val(d, poseq)
+ ltarg, ctarg = find_targ(d, poseq)
+ op_par = ''
+ cl_par = ''
+ if isinstance(val, ast.Tuple):
+ if d.lines[lval][cval] != '(':
+ op_par = '('
+ cl_par = ')'
+ # write the comment first
+ new_lines[lcom] = d.lines[lcom][:ccom].rstrip() + cl_par + subcom
+ ccom = len(d.lines[lcom][:ccom].rstrip())
+ string = False
+ if isinstance(val, ast.Tuple):
+ # t = 1, 2 -> t = (1, 2); only latter is allowed with annotation
+ free_place = int(new_lines[lval][cval-2:cval] == ' ')
+ new_lines[lval] = (new_lines[lval][:cval-free_place] +
+ op_par + new_lines[lval][cval:])
+ elif isinstance(val, ast.Ellipsis) and drop_Ellipsis:
+ string = '...'
+ elif (isinstance(val, ast.NameConstant) and
+ val.value is None and drop_None):
+ string = 'None'
+ if string:
+ trim(new_lines, string, ltarg, poseq, lcom, ccom)
+ # finally write an annotation
+ new_lines[ltarg] = (new_lines[ltarg][:ctarg] +
+ ': ' + tp + new_lines[ltarg][ctarg:])
+ return ''.join(new_lines)
+def com2ann(code, *, drop_None=False, drop_Ellipsis=False, silent=False):
+ """Translate type comments to type annotations in code.
+ Take code as string and return this string where::
+ variable = value # type: annotation # real comment
+ is translated to::
+ variable: annotation = value # real comment
+ For unsupported syntax cases, the type comments are
+ left intact. If drop_None is True or if drop_Ellipsis
+ is True translate correcpondingly::
+ variable = None # type: annotation
+ variable = ... # type: annotation
+ into::
+ variable: annotation
+ The tool tries to preserve code formatting as much as
+ possible, but an exact translation is not guarateed.
+ A summary of translated comments id printed by default.
+ """
+ try:
+ ast.parse(code) # we want to work only with file without syntax errors
+ except SyntaxError:
+ return None
+ lines = code.splitlines(keepends=True)
+ rl = BytesIO(code.encode('utf-8')).readline
+ tokens = list(tokenize.tokenize(rl))
+ data = _Data(lines, tokens)
+ new_code = _com2ann(data, drop_None, drop_Ellipsis)
+ if not silent:
+ if data.success:
+ print('Comments translated on lines:',
+ ', '.join(str(lno+1) for lno in data.success))
+ if
+ print('Comments rejected on lines:',
+ ', '.join(str(lno+1) for lno in
+ if not data.success and not
+ print('No type comments found')
+ return new_code
+def translate_file(infile, outfile, dnone, dell, silent):
+ try:
+ descr =
+ except SyntaxError:
+ print("Cannot open", infile)
+ return
+ with descr as f:
+ code =
+ enc = f.encoding
+ if not silent:
+ print('File:', infile)
+ new_code = com2ann(code, drop_None=dnone,
+ drop_Ellipsis=dell,
+ silent=silent)
+ if new_code is None:
+ print("SyntaxError in", infile)
+ return
+ with open(outfile, 'wb') as f:
+ f.write((new_code).encode(enc))
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("-o", "--outfile",
+ help="output file, will be overwritten if exists,\n"
+ "defaults to input file")
+ parser.add_argument("infile",
+ help="input file or directory for translation, must\n"
+ "contain no syntax errors, for directory\n"
+ "the outfile is ignored and translation is\n"
+ "made in place")
+ parser.add_argument("-s", "--silent",
+ help="Do not print summary for line numbers of\n"
+ "translated and rejected comments",
+ action="store_true")
+ parser.add_argument("-n", "--drop-none",
+ help="drop any None as assignment value during\n"
+ "translation if it is annotated by a type coment",
+ action="store_true")
+ parser.add_argument("-e", "--drop-ellipsis",
+ help="drop any Ellipsis (...) as assignment value during\n"
+ "translation if it is annotated by a type coment",
+ action="store_true")
+ args = parser.parse_args()
+ if args.outfile is None:
+ args.outfile = args.infile
+ if os.path.isfile(args.infile):
+ translate_file(args.infile, args.outfile,
+ args.drop_none, args.drop_ellipsis, args.silent)
+ else:
+ for root, dirs, files in os.walk(args.infile):
+ for afile in files:
+ _, ext = os.path.splitext(afile)
+ if ext == '.py' or ext == '.pyi':
+ fname = os.path.join(root, afile)
+ translate_file(fname, fname,
+ args.drop_none, args.drop_ellipsis,
+ args.silent)
diff --git a/Tools/parser/ b/Tools/parser/
index 7203057..6c296bd 100644
--- a/Tools/parser/
+++ b/Tools/parser/
@@ -104,6 +104,19 @@ class Unparser:
self.write(" "+self.binop[t.op.__class__.__name__]+"= ")
+ def _AnnAssign(self, t):
+ self.fill()
+ if not t.simple and isinstance(, ast.Name):
+ self.write('(')
+ self.dispatch(
+ if not t.simple and isinstance(, ast.Name):
+ self.write(')')
+ self.write(": ")
+ self.dispatch(t.annotation)
+ if t.value:
+ self.write(" = ")
+ self.dispatch(t.value)
def _Return(self, t):
if t.value: