Issue #27985: Implement PEP 526 -- Syntax for Variable Annotations.

Patch by Ivan Levkivskyi.
author: Yury Selivanov <yury@magic.io> 2016-09-09 03:50:03 (GMT)
committer: Yury Selivanov <yury@magic.io> 2016-09-09 03:50:03 (GMT)
commit: f8cb8a16a344ab208fd46876c4b63604987347b8 (patch)
tree: c44caa48291401d1e1e388004d2762513ac88c93 /Tools/parser
parent: 09ad17810c38d1aaae02de69084dd2a8ad9f5cdb (diff)
download: cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.zip
cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.gz
cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.bz2
2 files changed, 321 insertions, 0 deletions
diff --git a/Tools/parser/com2ann.py b/Tools/parser/com2ann.py
new file mode 100644
index 0000000..1f46e73
--- /dev/null
+++ b/Tools/parser/com2ann.py
@@ -0,0 +1,308 @@
+"""Helper module to tranlate 3.5 type comments to 3.6 variable annotations."""
+import re
+import os
+import ast
+import argparse
+import tokenize
+from collections import defaultdict
+from textwrap import dedent
+from io import BytesIO
+
+__all__ = ['com2ann', 'TYPE_COM']
+
+TYPE_COM = re.compile('\s*#\s*type\s*:.*$', flags=re.DOTALL)
+TRAIL_OR_COM = re.compile('\s*$|\s*#.*$', flags=re.DOTALL)
+
+
+class _Data:
+    """Internal class describing global data on file."""
+    def __init__(self, lines, tokens):
+        self.lines = lines
+        self.tokens = tokens
+        ttab = defaultdict(list) # maps line number to token numbers
+        for i, tok in enumerate(tokens):
+            ttab[tok.start[0]].append(i)
+        self.ttab = ttab
+        self.success = [] # list of lines where type comments where processed
+        self.fail = [] # list of lines where type comments where rejected
+
+
+def skip_blank(d, lno):
+    while d.lines[lno].strip() == '':
+        lno += 1
+    return lno
+
+
+def find_start(d, lcom):
+    """Find first char of the assignment target."""
+    i = d.ttab[lcom + 1][-2] # index of type comment token in tokens list
+    while ((d.tokens[i].exact_type != tokenize.NEWLINE) and
+           (d.tokens[i].exact_type != tokenize.ENCODING)):
+        i -= 1
+    lno = d.tokens[i].start[0]
+    return skip_blank(d, lno)
+
+
+def check_target(stmt):
+    if len(stmt.body):
+        assign = stmt.body[0]
+    else:
+        return False
+    if isinstance(assign, ast.Assign) and len(assign.targets) == 1:
+        targ = assign.targets[0]
+    else:
+        return False
+    if (isinstance(targ, ast.Name) or isinstance(targ, ast.Attribute)
+        or isinstance(targ, ast.Subscript)):
+        return True
+    return False
+
+
+def find_eq(d, lstart):
+    """Find equal sign starting from lstart taking care about d[f(x=1)] = 5."""
+    col = pars = 0
+    lno = lstart
+    while d.lines[lno][col] != '=' or pars != 0:
+        ch = d.lines[lno][col]
+        if ch in '([{':
+            pars += 1
+        elif ch in ')]}':
+            pars -= 1
+        if ch == '#' or col == len(d.lines[lno])-1:
+            lno = skip_blank(d, lno+1)
+            col = 0
+        else:
+            col += 1
+    return lno, col
+
+
+def find_val(d, poseq):
+    """Find position of first char of assignment value starting from poseq."""
+    lno, col = poseq
+    while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
+        if col == len(d.lines[lno])-1:
+            lno += 1
+            col = 0
+        else:
+            col += 1
+    return lno, col
+
+
+def find_targ(d, poseq):
+    """Find position of last char of target (annotation goes here)."""
+    lno, col = poseq
+    while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
+        if col == 0:
+            lno -= 1
+            col = len(d.lines[lno])-1
+        else:
+            col -= 1
+    return lno, col+1
+
+
+def trim(new_lines, string, ltarg, poseq, lcom, ccom):
+    """Remove None or Ellipsis from assignment value.
+
+    Also remove parens if one has (None), (...) etc.
+    string -- 'None' or '...'
+    ltarg -- line where last char of target is located
+    poseq -- position of equal sign
+    lcom, ccom -- position of type comment
+    """
+    nopars = lambda s: s.replace('(', '').replace(')', '')
+    leq, ceq = poseq
+    end = ccom if leq == lcom else len(new_lines[leq])
+    subline = new_lines[leq][:ceq]
+    if leq == ltarg:
+        subline = subline.rstrip()
+    new_lines[leq] = subline + (new_lines[leq][end:] if leq == lcom
+                                else new_lines[leq][ceq+1:end])
+
+    for lno in range(leq+1,lcom):
+        new_lines[lno] = nopars(new_lines[lno])
+
+    if lcom != leq:
+        subline = nopars(new_lines[lcom][:ccom]).replace(string, '')
+        if (not subline.isspace()):
+            subline = subline.rstrip()
+        new_lines[lcom] = subline + new_lines[lcom][ccom:]
+
+
+def _com2ann(d, drop_None, drop_Ellipsis):
+    new_lines = d.lines[:]
+    for lcom, line in enumerate(d.lines):
+        match = re.search(TYPE_COM, line)
+        if match:
+            # strip " #  type  :  annotation  \n" -> "annotation  \n"
+            tp = match.group().lstrip()[1:].lstrip()[4:].lstrip()[1:].lstrip()
+            submatch = re.search(TRAIL_OR_COM, tp)
+            subcom = ''
+            if submatch and submatch.group():
+                subcom = submatch.group()
+                tp = tp[:submatch.start()]
+            if tp == 'ignore':
+                continue
+            ccom = match.start()
+            if not any(d.tokens[i].exact_type == tokenize.COMMENT
+                   for i in d.ttab[lcom + 1]):
+                d.fail.append(lcom)
+                continue # type comment inside string
+            lstart = find_start(d, lcom)
+            stmt_str = dedent(''.join(d.lines[lstart:lcom+1]))
+            try:
+                stmt = ast.parse(stmt_str)
+            except SyntaxError:
+                d.fail.append(lcom)
+                continue # for or with statements
+            if not check_target(stmt):
+                d.fail.append(lcom)
+                continue
+
+            d.success.append(lcom)
+            val = stmt.body[0].value
+
+            # writing output now
+            poseq = find_eq(d, lstart)
+            lval, cval = find_val(d, poseq)
+            ltarg, ctarg = find_targ(d, poseq)
+
+            op_par = ''
+            cl_par = ''
+            if isinstance(val, ast.Tuple):
+                if d.lines[lval][cval] != '(':
+                    op_par = '('
+                    cl_par = ')'
+            # write the comment first
+            new_lines[lcom] = d.lines[lcom][:ccom].rstrip() + cl_par + subcom
+            ccom = len(d.lines[lcom][:ccom].rstrip())
+
+            string = False
+            if isinstance(val, ast.Tuple):
+            # t = 1, 2 -> t = (1, 2); only latter is allowed with annotation
+                free_place = int(new_lines[lval][cval-2:cval] == '  ')
+                new_lines[lval] = (new_lines[lval][:cval-free_place] +
+                                       op_par + new_lines[lval][cval:])
+            elif isinstance(val, ast.Ellipsis) and drop_Ellipsis:
+                string = '...'
+            elif (isinstance(val, ast.NameConstant) and
+                        val.value is None and drop_None):
+                string = 'None'
+            if string:
+                trim(new_lines, string, ltarg, poseq, lcom, ccom)
+
+            # finally write an annotation
+            new_lines[ltarg] = (new_lines[ltarg][:ctarg] +
+                              ': ' + tp + new_lines[ltarg][ctarg:])
+    return ''.join(new_lines)
+
+
+def com2ann(code, *, drop_None=False, drop_Ellipsis=False, silent=False):
+    """Translate type comments to type annotations in code.
+
+    Take code as string and return this string where::
+
+      variable = value # type: annotation # real comment
+
+    is translated to::
+
+      variable: annotation = value # real comment
+
+    For unsupported syntax cases, the type comments are
+    left intact. If drop_None is True or if drop_Ellipsis
+    is True translate correcpondingly::
+
+      variable = None # type: annotation
+      variable = ... # type: annotation
+
+    into::
+
+      variable: annotation
+
+    The tool tries to preserve code formatting as much as
+    possible, but an exact translation is not guarateed.
+    A summary of translated comments id printed by default.
+    """
+    try:
+        ast.parse(code) # we want to work only with file without syntax errors
+    except SyntaxError:
+        return None
+    lines = code.splitlines(keepends=True)
+    rl = BytesIO(code.encode('utf-8')).readline
+    tokens = list(tokenize.tokenize(rl))
+
+    data = _Data(lines, tokens)
+    new_code = _com2ann(data, drop_None, drop_Ellipsis)
+
+    if not silent:
+        if data.success:
+            print('Comments translated on lines:',
+                  ', '.join(str(lno+1) for lno in data.success))
+        if data.fail:
+            print('Comments rejected on lines:',
+                  ', '.join(str(lno+1) for lno in data.fail))
+        if not data.success and not data.fail:
+            print('No type comments found')
+
+    return new_code
+
+
+def translate_file(infile, outfile, dnone, dell, silent):
+    try:
+        descr = tokenize.open(infile)
+    except SyntaxError:
+        print("Cannot open", infile)
+        return
+    with descr as f:
+        code = f.read()
+        enc = f.encoding
+    if not silent:
+        print('File:', infile)
+    new_code = com2ann(code, drop_None=dnone,
+                             drop_Ellipsis=dell,
+                             silent=silent)
+    if new_code is None:
+        print("SyntaxError in", infile)
+        return
+    with open(outfile, 'wb') as f:
+        f.write((new_code).encode(enc))
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("-o", "--outfile",
+                        help="output file, will be overwritten if exists,\n"
+                             "defaults to input file")
+    parser.add_argument("infile",
+                        help="input file or directory for translation, must\n"
+                             "contain no syntax errors, for directory\n"
+                             "the outfile is ignored and translation is\n"
+                             "made in place")
+    parser.add_argument("-s", "--silent",
+                        help="Do not print summary for line numbers of\n"
+                             "translated and rejected comments",
+                        action="store_true")
+    parser.add_argument("-n", "--drop-none",
+                   help="drop any None as assignment value during\n"
+                        "translation if it is annotated by a type coment",
+                   action="store_true")
+    parser.add_argument("-e", "--drop-ellipsis",
+                   help="drop any Ellipsis (...) as assignment value during\n"
+                        "translation if it is annotated by a type coment",
+                   action="store_true")
+    args = parser.parse_args()
+    if args.outfile is None:
+        args.outfile = args.infile
+
+    if os.path.isfile(args.infile):
+        translate_file(args.infile, args.outfile,
+                       args.drop_none, args.drop_ellipsis, args.silent)
+    else:
+        for root, dirs, files in os.walk(args.infile):
+            for afile in files:
+                _, ext = os.path.splitext(afile)
+                if ext == '.py' or ext == '.pyi':
+                    fname = os.path.join(root, afile)
+                    translate_file(fname, fname,
+                                   args.drop_none, args.drop_ellipsis,
+                                   args.silent)
diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py
index 7203057..6c296bd 100644
--- a/Tools/parser/unparse.py
+++ b/Tools/parser/unparse.py
@@ -104,6 +104,19 @@ class Unparser:
         self.write(" "+self.binop[t.op.__class__.__name__]+"= ")
         self.dispatch(t.value)
 
+    def _AnnAssign(self, t):
+        self.fill()
+        if not t.simple and isinstance(t.target, ast.Name):
+            self.write('(')
+        self.dispatch(t.target)
+        if not t.simple and isinstance(t.target, ast.Name):
+            self.write(')')
+        self.write(": ")
+        self.dispatch(t.annotation)
+        if t.value:
+            self.write(" = ")
+            self.dispatch(t.value)
+
     def _Return(self, t):
         self.fill("return")
         if t.value:
author	Yury Selivanov <yury@magic.io>	2016-09-09 03:50:03 (GMT)
committer	Yury Selivanov <yury@magic.io>	2016-09-09 03:50:03 (GMT)
commit	f8cb8a16a344ab208fd46876c4b63604987347b8 (patch)
tree	c44caa48291401d1e1e388004d2762513ac88c93 /Tools/parser
parent	09ad17810c38d1aaae02de69084dd2a8ad9f5cdb (diff)
download	cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.zip cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.gz cpython-f8cb8a16a344ab208fd46876c4b63604987347b8.tar.bz2