1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
|
"""Helper module to tranlate 3.5 type comments to 3.6 variable annotations."""
import re
import os
import ast
import argparse
import tokenize
from collections import defaultdict
from textwrap import dedent
from io import BytesIO
__all__ = ['com2ann', 'TYPE_COM']
TYPE_COM = re.compile(r'\s*#\s*type\s*:.*$', flags=re.DOTALL)
TRAIL_OR_COM = re.compile(r'\s*$|\s*#.*$', flags=re.DOTALL)
class _Data:
"""Internal class describing global data on file."""
def __init__(self, lines, tokens):
self.lines = lines
self.tokens = tokens
ttab = defaultdict(list) # maps line number to token numbers
for i, tok in enumerate(tokens):
ttab[tok.start[0]].append(i)
self.ttab = ttab
self.success = [] # list of lines where type comments where processed
self.fail = [] # list of lines where type comments where rejected
def skip_blank(d, lno):
while d.lines[lno].strip() == '':
lno += 1
return lno
def find_start(d, lcom):
"""Find first char of the assignment target."""
i = d.ttab[lcom + 1][-2] # index of type comment token in tokens list
while ((d.tokens[i].exact_type != tokenize.NEWLINE) and
(d.tokens[i].exact_type != tokenize.ENCODING)):
i -= 1
lno = d.tokens[i].start[0]
return skip_blank(d, lno)
def check_target(stmt):
if len(stmt.body):
assign = stmt.body[0]
else:
return False
if isinstance(assign, ast.Assign) and len(assign.targets) == 1:
targ = assign.targets[0]
else:
return False
if (isinstance(targ, ast.Name) or isinstance(targ, ast.Attribute)
or isinstance(targ, ast.Subscript)):
return True
return False
def find_eq(d, lstart):
"""Find equal sign starting from lstart taking care about d[f(x=1)] = 5."""
col = pars = 0
lno = lstart
while d.lines[lno][col] != '=' or pars != 0:
ch = d.lines[lno][col]
if ch in '([{':
pars += 1
elif ch in ')]}':
pars -= 1
if ch == '#' or col == len(d.lines[lno])-1:
lno = skip_blank(d, lno+1)
col = 0
else:
col += 1
return lno, col
def find_val(d, poseq):
"""Find position of first char of assignment value starting from poseq."""
lno, col = poseq
while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
if col == len(d.lines[lno])-1:
lno += 1
col = 0
else:
col += 1
return lno, col
def find_targ(d, poseq):
"""Find position of last char of target (annotation goes here)."""
lno, col = poseq
while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'):
if col == 0:
lno -= 1
col = len(d.lines[lno])-1
else:
col -= 1
return lno, col+1
def trim(new_lines, string, ltarg, poseq, lcom, ccom):
"""Remove None or Ellipsis from assignment value.
Also remove parens if one has (None), (...) etc.
string -- 'None' or '...'
ltarg -- line where last char of target is located
poseq -- position of equal sign
lcom, ccom -- position of type comment
"""
nopars = lambda s: s.replace('(', '').replace(')', '')
leq, ceq = poseq
end = ccom if leq == lcom else len(new_lines[leq])
subline = new_lines[leq][:ceq]
if leq == ltarg:
subline = subline.rstrip()
new_lines[leq] = subline + (new_lines[leq][end:] if leq == lcom
else new_lines[leq][ceq+1:end])
for lno in range(leq+1,lcom):
new_lines[lno] = nopars(new_lines[lno])
if lcom != leq:
subline = nopars(new_lines[lcom][:ccom]).replace(string, '')
if (not subline.isspace()):
subline = subline.rstrip()
new_lines[lcom] = subline + new_lines[lcom][ccom:]
def _com2ann(d, drop_None, drop_Ellipsis):
new_lines = d.lines[:]
for lcom, line in enumerate(d.lines):
match = re.search(TYPE_COM, line)
if match:
# strip " # type : annotation \n" -> "annotation \n"
tp = match.group().lstrip()[1:].lstrip()[4:].lstrip()[1:].lstrip()
submatch = re.search(TRAIL_OR_COM, tp)
subcom = ''
if submatch and submatch.group():
subcom = submatch.group()
tp = tp[:submatch.start()]
if tp == 'ignore':
continue
ccom = match.start()
if not any(d.tokens[i].exact_type == tokenize.COMMENT
for i in d.ttab[lcom + 1]):
d.fail.append(lcom)
continue # type comment inside string
lstart = find_start(d, lcom)
stmt_str = dedent(''.join(d.lines[lstart:lcom+1]))
try:
stmt = ast.parse(stmt_str)
except SyntaxError:
d.fail.append(lcom)
continue # for or with statements
if not check_target(stmt):
d.fail.append(lcom)
continue
d.success.append(lcom)
val = stmt.body[0].value
# writing output now
poseq = find_eq(d, lstart)
lval, cval = find_val(d, poseq)
ltarg, ctarg = find_targ(d, poseq)
op_par = ''
cl_par = ''
if isinstance(val, ast.Tuple):
if d.lines[lval][cval] != '(':
op_par = '('
cl_par = ')'
# write the comment first
new_lines[lcom] = d.lines[lcom][:ccom].rstrip() + cl_par + subcom
ccom = len(d.lines[lcom][:ccom].rstrip())
string = False
if isinstance(val, ast.Tuple):
# t = 1, 2 -> t = (1, 2); only latter is allowed with annotation
free_place = int(new_lines[lval][cval-2:cval] == ' ')
new_lines[lval] = (new_lines[lval][:cval-free_place] +
op_par + new_lines[lval][cval:])
elif isinstance(val, ast.Ellipsis) and drop_Ellipsis:
string = '...'
elif (isinstance(val, ast.NameConstant) and
val.value is None and drop_None):
string = 'None'
if string:
trim(new_lines, string, ltarg, poseq, lcom, ccom)
# finally write an annotation
new_lines[ltarg] = (new_lines[ltarg][:ctarg] +
': ' + tp + new_lines[ltarg][ctarg:])
return ''.join(new_lines)
def com2ann(code, *, drop_None=False, drop_Ellipsis=False, silent=False):
"""Translate type comments to type annotations in code.
Take code as string and return this string where::
variable = value # type: annotation # real comment
is translated to::
variable: annotation = value # real comment
For unsupported syntax cases, the type comments are
left intact. If drop_None is True or if drop_Ellipsis
is True translate correcpondingly::
variable = None # type: annotation
variable = ... # type: annotation
into::
variable: annotation
The tool tries to preserve code formatting as much as
possible, but an exact translation is not guarateed.
A summary of translated comments id printed by default.
"""
try:
ast.parse(code) # we want to work only with file without syntax errors
except SyntaxError:
return None
lines = code.splitlines(keepends=True)
rl = BytesIO(code.encode('utf-8')).readline
tokens = list(tokenize.tokenize(rl))
data = _Data(lines, tokens)
new_code = _com2ann(data, drop_None, drop_Ellipsis)
if not silent:
if data.success:
print('Comments translated on lines:',
', '.join(str(lno+1) for lno in data.success))
if data.fail:
print('Comments rejected on lines:',
', '.join(str(lno+1) for lno in data.fail))
if not data.success and not data.fail:
print('No type comments found')
return new_code
def translate_file(infile, outfile, dnone, dell, silent):
try:
descr = tokenize.open(infile)
except SyntaxError:
print("Cannot open", infile)
return
with descr as f:
code = f.read()
enc = f.encoding
if not silent:
print('File:', infile)
new_code = com2ann(code, drop_None=dnone,
drop_Ellipsis=dell,
silent=silent)
if new_code is None:
print("SyntaxError in", infile)
return
with open(outfile, 'wb') as f:
f.write((new_code).encode(enc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-o", "--outfile",
help="output file, will be overwritten if exists,\n"
"defaults to input file")
parser.add_argument("infile",
help="input file or directory for translation, must\n"
"contain no syntax errors, for directory\n"
"the outfile is ignored and translation is\n"
"made in place")
parser.add_argument("-s", "--silent",
help="Do not print summary for line numbers of\n"
"translated and rejected comments",
action="store_true")
parser.add_argument("-n", "--drop-none",
help="drop any None as assignment value during\n"
"translation if it is annotated by a type coment",
action="store_true")
parser.add_argument("-e", "--drop-ellipsis",
help="drop any Ellipsis (...) as assignment value during\n"
"translation if it is annotated by a type coment",
action="store_true")
args = parser.parse_args()
if args.outfile is None:
args.outfile = args.infile
if os.path.isfile(args.infile):
translate_file(args.infile, args.outfile,
args.drop_none, args.drop_ellipsis, args.silent)
else:
for root, dirs, files in os.walk(args.infile):
for afile in files:
_, ext = os.path.splitext(afile)
if ext == '.py' or ext == '.pyi':
fname = os.path.join(root, afile)
translate_file(fname, fname,
args.drop_none, args.drop_ellipsis,
args.silent)
|