diff options
Diffstat (limited to 'Tools/parser')
-rw-r--r-- | Tools/parser/test_unparse.py | 240 | ||||
-rw-r--r-- | Tools/parser/unparse.py | 600 |
2 files changed, 840 insertions, 0 deletions
diff --git a/Tools/parser/test_unparse.py b/Tools/parser/test_unparse.py new file mode 100644 index 0000000..d457523 --- /dev/null +++ b/Tools/parser/test_unparse.py @@ -0,0 +1,240 @@ +import unittest +import test.support +import io +import os +import tokenize +import ast +import unparse + +def read_pyfile(filename): + """Read and return the contents of a Python source file (as a + string), taking into account the file encoding.""" + with open(filename, "rb") as pyfile: + encoding = tokenize.detect_encoding(pyfile.readline)[0] + with open(filename, "r", encoding=encoding) as pyfile: + source = pyfile.read() + return source + +for_else = """\ +def f(): + for x in range(10): + break + else: + y = 2 + z = 3 +""" + +while_else = """\ +def g(): + while True: + break + else: + y = 2 + z = 3 +""" + +relative_import = """\ +from . import fred +from .. import barney +from .australia import shrimp as prawns +""" + +nonlocal_ex = """\ +def f(): + x = 1 + def g(): + nonlocal x + x = 2 + y = 7 + def h(): + nonlocal x, y +""" + +# also acts as test for 'except ... as ...' +raise_from = """\ +try: + 1 / 0 +except ZeroDivisionError as e: + raise ArithmeticError from e +""" + +class_decorator = """\ +@f1(arg) +@f2 +class Foo: pass +""" + +elif1 = """\ +if cond1: + suite1 +elif cond2: + suite2 +else: + suite3 +""" + +elif2 = """\ +if cond1: + suite1 +elif cond2: + suite2 +""" + +try_except_finally = """\ +try: + suite1 +except ex1: + suite2 +except ex2: + suite3 +else: + suite4 +finally: + suite5 +""" + +class ASTTestCase(unittest.TestCase): + def assertASTEqual(self, ast1, ast2): + self.assertEqual(ast.dump(ast1), ast.dump(ast2)) + + def check_roundtrip(self, code1, filename="internal"): + ast1 = compile(code1, filename, "exec", ast.PyCF_ONLY_AST) + unparse_buffer = io.StringIO() + unparse.Unparser(ast1, unparse_buffer) + code2 = unparse_buffer.getvalue() + ast2 = compile(code2, filename, "exec", ast.PyCF_ONLY_AST) + self.assertASTEqual(ast1, ast2) + +class UnparseTestCase(ASTTestCase): + # Tests for specific bugs found in earlier versions of unparse + + def test_del_statement(self): + self.check_roundtrip("del x, y, z") + + def test_shifts(self): + self.check_roundtrip("45 << 2") + self.check_roundtrip("13 >> 7") + + def test_for_else(self): + self.check_roundtrip(for_else) + + def test_while_else(self): + self.check_roundtrip(while_else) + + def test_unary_parens(self): + self.check_roundtrip("(-1)**7") + self.check_roundtrip("(-1.)**8") + self.check_roundtrip("(-1j)**6") + self.check_roundtrip("not True or False") + self.check_roundtrip("True or not False") + + def test_integer_parens(self): + self.check_roundtrip("3 .__abs__()") + + def test_huge_float(self): + self.check_roundtrip("1e1000") + self.check_roundtrip("-1e1000") + self.check_roundtrip("1e1000j") + self.check_roundtrip("-1e1000j") + + def test_min_int(self): + self.check_roundtrip(str(-2**31)) + self.check_roundtrip(str(-2**63)) + + def test_imaginary_literals(self): + self.check_roundtrip("7j") + self.check_roundtrip("-7j") + self.check_roundtrip("0j") + self.check_roundtrip("-0j") + + def test_lambda_parentheses(self): + self.check_roundtrip("(lambda: int)()") + + def test_chained_comparisons(self): + self.check_roundtrip("1 < 4 <= 5") + self.check_roundtrip("a is b is c is not d") + + def test_function_arguments(self): + self.check_roundtrip("def f(): pass") + self.check_roundtrip("def f(a): pass") + self.check_roundtrip("def f(b = 2): pass") + self.check_roundtrip("def f(a, b): pass") + self.check_roundtrip("def f(a, b = 2): pass") + self.check_roundtrip("def f(a = 5, b = 2): pass") + self.check_roundtrip("def f(*, a = 1, b = 2): pass") + self.check_roundtrip("def f(*, a = 1, b): pass") + self.check_roundtrip("def f(*, a, b = 2): pass") + self.check_roundtrip("def f(a, b = None, *, c, **kwds): pass") + self.check_roundtrip("def f(a=2, *args, c=5, d, **kwds): pass") + self.check_roundtrip("def f(*args, **kwargs): pass") + + def test_relative_import(self): + self.check_roundtrip(relative_import) + + def test_nonlocal(self): + self.check_roundtrip(nonlocal_ex) + + def test_raise_from(self): + self.check_roundtrip(raise_from) + + def test_bytes(self): + self.check_roundtrip("b'123'") + + def test_annotations(self): + self.check_roundtrip("def f(a : int): pass") + self.check_roundtrip("def f(a: int = 5): pass") + self.check_roundtrip("def f(*args: [int]): pass") + self.check_roundtrip("def f(**kwargs: dict): pass") + self.check_roundtrip("def f() -> None: pass") + + def test_set_literal(self): + self.check_roundtrip("{'a', 'b', 'c'}") + + def test_set_comprehension(self): + self.check_roundtrip("{x for x in range(5)}") + + def test_dict_comprehension(self): + self.check_roundtrip("{x: x*x for x in range(10)}") + + def test_class_decorators(self): + self.check_roundtrip(class_decorator) + + def test_class_definition(self): + self.check_roundtrip("class A(metaclass=type, *[], **{}): pass") + + def test_elifs(self): + self.check_roundtrip(elif1) + self.check_roundtrip(elif2) + + def test_try_except_finally(self): + self.check_roundtrip(try_except_finally) + +class DirectoryTestCase(ASTTestCase): + """Test roundtrip behaviour on all files in Lib and Lib/test.""" + + # test directories, relative to the root of the distribution + test_directories = 'Lib', os.path.join('Lib', 'test') + + def test_files(self): + # get names of files to test + dist_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir) + + names = [] + for d in self.test_directories: + test_dir = os.path.join(dist_dir, d) + for n in os.listdir(test_dir): + if n.endswith('.py') and not n.startswith('bad'): + names.append(os.path.join(test_dir, n)) + + for filename in names: + if test.support.verbose: + print('Testing %s' % filename) + source = read_pyfile(filename) + self.check_roundtrip(source) + + +def test_main(): + test.support.run_unittest(UnparseTestCase, DirectoryTestCase) + +if __name__ == '__main__': + test_main() diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py new file mode 100644 index 0000000..e96ef54 --- /dev/null +++ b/Tools/parser/unparse.py @@ -0,0 +1,600 @@ +"Usage: unparse.py <path to source file>" +import sys +import math +import ast +import tokenize +import io +import os + +# Large float and imaginary literals get turned into infinities in the AST. +# We unparse those infinities to INFSTR. +INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) + +def interleave(inter, f, seq): + """Call f on each item in seq, calling inter() in between. + """ + seq = iter(seq) + try: + f(next(seq)) + except StopIteration: + pass + else: + for x in seq: + inter() + f(x) + +class Unparser: + """Methods in this class recursively traverse an AST and + output source code for the abstract syntax; original formatting + is disregarded. """ + + def __init__(self, tree, file = sys.stdout): + """Unparser(tree, file=sys.stdout) -> None. + Print the source for tree to file.""" + self.f = file + self._indent = 0 + self.dispatch(tree) + print("", file=self.f) + self.f.flush() + + def fill(self, text = ""): + "Indent a piece of text, according to the current indentation level" + self.f.write("\n"+" "*self._indent + text) + + def write(self, text): + "Append a piece of text to the current line." + self.f.write(text) + + def enter(self): + "Print ':', and increase the indentation." + self.write(":") + self._indent += 1 + + def leave(self): + "Decrease the indentation level." + self._indent -= 1 + + def dispatch(self, tree): + "Dispatcher function, dispatching tree type T to method _T." + if isinstance(tree, list): + for t in tree: + self.dispatch(t) + return + meth = getattr(self, "_"+tree.__class__.__name__) + meth(tree) + + + ############### Unparsing methods ###################### + # There should be one method per concrete grammar type # + # Constructors should be grouped by sum type. Ideally, # + # this would follow the order in the grammar, but # + # currently doesn't. # + ######################################################## + + def _Module(self, tree): + for stmt in tree.body: + self.dispatch(stmt) + + # stmt + def _Expr(self, tree): + self.fill() + self.dispatch(tree.value) + + def _Import(self, t): + self.fill("import ") + interleave(lambda: self.write(", "), self.dispatch, t.names) + + def _ImportFrom(self, t): + self.fill("from ") + self.write("." * t.level) + if t.module: + self.write(t.module) + self.write(" import ") + interleave(lambda: self.write(", "), self.dispatch, t.names) + + def _Assign(self, t): + self.fill() + for target in t.targets: + self.dispatch(target) + self.write(" = ") + self.dispatch(t.value) + + def _AugAssign(self, t): + self.fill() + self.dispatch(t.target) + self.write(" "+self.binop[t.op.__class__.__name__]+"= ") + self.dispatch(t.value) + + def _Return(self, t): + self.fill("return") + if t.value: + self.write(" ") + self.dispatch(t.value) + + def _Pass(self, t): + self.fill("pass") + + def _Break(self, t): + self.fill("break") + + def _Continue(self, t): + self.fill("continue") + + def _Delete(self, t): + self.fill("del ") + interleave(lambda: self.write(", "), self.dispatch, t.targets) + + def _Assert(self, t): + self.fill("assert ") + self.dispatch(t.test) + if t.msg: + self.write(", ") + self.dispatch(t.msg) + + def _Global(self, t): + self.fill("global ") + interleave(lambda: self.write(", "), self.write, t.names) + + def _Nonlocal(self, t): + self.fill("nonlocal ") + interleave(lambda: self.write(", "), self.write, t.names) + + def _Yield(self, t): + self.write("(") + self.write("yield") + if t.value: + self.write(" ") + self.dispatch(t.value) + self.write(")") + + def _Raise(self, t): + self.fill("raise") + if not t.exc: + assert not t.cause + return + self.write(" ") + self.dispatch(t.exc) + if t.cause: + self.write(" from ") + self.dispatch(t.cause) + + def _TryExcept(self, t): + self.fill("try") + self.enter() + self.dispatch(t.body) + self.leave() + + for ex in t.handlers: + self.dispatch(ex) + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave() + + def _TryFinally(self, t): + if len(t.body) == 1 and isinstance(t.body[0], ast.TryExcept): + # try-except-finally + self.dispatch(t.body) + else: + self.fill("try") + self.enter() + self.dispatch(t.body) + self.leave() + + self.fill("finally") + self.enter() + self.dispatch(t.finalbody) + self.leave() + + def _ExceptHandler(self, t): + self.fill("except") + if t.type: + self.write(" ") + self.dispatch(t.type) + if t.name: + self.write(" as ") + self.write(t.name) + self.enter() + self.dispatch(t.body) + self.leave() + + def _ClassDef(self, t): + self.write("\n") + for deco in t.decorator_list: + self.fill("@") + self.dispatch(deco) + self.fill("class "+t.name) + self.write("(") + comma = False + for e in t.bases: + if comma: self.write(", ") + else: comma = True + self.dispatch(e) + for e in t.keywords: + if comma: self.write(", ") + else: comma = True + self.dispatch(e) + if t.starargs: + if comma: self.write(", ") + else: comma = True + self.write("*") + self.dispatch(t.starargs) + if t.kwargs: + if comma: self.write(", ") + else: comma = True + self.write("**") + self.dispatch(t.kwargs) + self.write(")") + + self.enter() + self.dispatch(t.body) + self.leave() + + def _FunctionDef(self, t): + self.write("\n") + for deco in t.decorator_list: + self.fill("@") + self.dispatch(deco) + self.fill("def "+t.name + "(") + self.dispatch(t.args) + self.write(")") + if t.returns: + self.write(" -> ") + self.dispatch(t.returns) + self.enter() + self.dispatch(t.body) + self.leave() + + def _For(self, t): + self.fill("for ") + self.dispatch(t.target) + self.write(" in ") + self.dispatch(t.iter) + self.enter() + self.dispatch(t.body) + self.leave() + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave() + + def _If(self, t): + self.fill("if ") + self.dispatch(t.test) + self.enter() + self.dispatch(t.body) + self.leave() + # collapse nested ifs into equivalent elifs. + while (t.orelse and len(t.orelse) == 1 and + isinstance(t.orelse[0], ast.If)): + t = t.orelse[0] + self.fill("elif ") + self.dispatch(t.test) + self.enter() + self.dispatch(t.body) + self.leave() + # final else + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave() + + def _While(self, t): + self.fill("while ") + self.dispatch(t.test) + self.enter() + self.dispatch(t.body) + self.leave() + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave() + + def _With(self, t): + self.fill("with ") + self.dispatch(t.context_expr) + if t.optional_vars: + self.write(" as ") + self.dispatch(t.optional_vars) + self.enter() + self.dispatch(t.body) + self.leave() + + # expr + def _Bytes(self, t): + self.write(repr(t.s)) + + def _Str(self, tree): + self.write(repr(tree.s)) + + def _Name(self, t): + self.write(t.id) + + def _Num(self, t): + # Substitute overflowing decimal literal for AST infinities. + self.write(repr(t.n).replace("inf", INFSTR)) + + def _List(self, t): + self.write("[") + interleave(lambda: self.write(", "), self.dispatch, t.elts) + self.write("]") + + def _ListComp(self, t): + self.write("[") + self.dispatch(t.elt) + for gen in t.generators: + self.dispatch(gen) + self.write("]") + + def _GeneratorExp(self, t): + self.write("(") + self.dispatch(t.elt) + for gen in t.generators: + self.dispatch(gen) + self.write(")") + + def _SetComp(self, t): + self.write("{") + self.dispatch(t.elt) + for gen in t.generators: + self.dispatch(gen) + self.write("}") + + def _DictComp(self, t): + self.write("{") + self.dispatch(t.key) + self.write(": ") + self.dispatch(t.value) + for gen in t.generators: + self.dispatch(gen) + self.write("}") + + def _comprehension(self, t): + self.write(" for ") + self.dispatch(t.target) + self.write(" in ") + self.dispatch(t.iter) + for if_clause in t.ifs: + self.write(" if ") + self.dispatch(if_clause) + + def _IfExp(self, t): + self.write("(") + self.dispatch(t.body) + self.write(" if ") + self.dispatch(t.test) + self.write(" else ") + self.dispatch(t.orelse) + self.write(")") + + def _Set(self, t): + assert(t.elts) # should be at least one element + self.write("{") + interleave(lambda: self.write(", "), self.dispatch, t.elts) + self.write("}") + + def _Dict(self, t): + self.write("{") + def write_pair(pair): + (k, v) = pair + self.dispatch(k) + self.write(": ") + self.dispatch(v) + interleave(lambda: self.write(", "), write_pair, zip(t.keys, t.values)) + self.write("}") + + def _Tuple(self, t): + self.write("(") + if len(t.elts) == 1: + (elt,) = t.elts + self.dispatch(elt) + self.write(",") + else: + interleave(lambda: self.write(", "), self.dispatch, t.elts) + self.write(")") + + unop = {"Invert":"~", "Not": "not", "UAdd":"+", "USub":"-"} + def _UnaryOp(self, t): + self.write("(") + self.write(self.unop[t.op.__class__.__name__]) + self.write(" ") + self.dispatch(t.operand) + self.write(")") + + binop = { "Add":"+", "Sub":"-", "Mult":"*", "Div":"/", "Mod":"%", + "LShift":"<<", "RShift":">>", "BitOr":"|", "BitXor":"^", "BitAnd":"&", + "FloorDiv":"//", "Pow": "**"} + def _BinOp(self, t): + self.write("(") + self.dispatch(t.left) + self.write(" " + self.binop[t.op.__class__.__name__] + " ") + self.dispatch(t.right) + self.write(")") + + cmpops = {"Eq":"==", "NotEq":"!=", "Lt":"<", "LtE":"<=", "Gt":">", "GtE":">=", + "Is":"is", "IsNot":"is not", "In":"in", "NotIn":"not in"} + def _Compare(self, t): + self.write("(") + self.dispatch(t.left) + for o, e in zip(t.ops, t.comparators): + self.write(" " + self.cmpops[o.__class__.__name__] + " ") + self.dispatch(e) + self.write(")") + + boolops = {ast.And: 'and', ast.Or: 'or'} + def _BoolOp(self, t): + self.write("(") + s = " %s " % self.boolops[t.op.__class__] + interleave(lambda: self.write(s), self.dispatch, t.values) + self.write(")") + + def _Attribute(self,t): + self.dispatch(t.value) + # Special case: 3.__abs__() is a syntax error, so if t.value + # is an integer literal then we need to either parenthesize + # it or add an extra space to get 3 .__abs__(). + if isinstance(t.value, ast.Num) and isinstance(t.value.n, int): + self.write(" ") + self.write(".") + self.write(t.attr) + + def _Call(self, t): + self.dispatch(t.func) + self.write("(") + comma = False + for e in t.args: + if comma: self.write(", ") + else: comma = True + self.dispatch(e) + for e in t.keywords: + if comma: self.write(", ") + else: comma = True + self.dispatch(e) + if t.starargs: + if comma: self.write(", ") + else: comma = True + self.write("*") + self.dispatch(t.starargs) + if t.kwargs: + if comma: self.write(", ") + else: comma = True + self.write("**") + self.dispatch(t.kwargs) + self.write(")") + + def _Subscript(self, t): + self.dispatch(t.value) + self.write("[") + self.dispatch(t.slice) + self.write("]") + + # slice + def _Ellipsis(self, t): + self.write("...") + + def _Index(self, t): + self.dispatch(t.value) + + def _Slice(self, t): + if t.lower: + self.dispatch(t.lower) + self.write(":") + if t.upper: + self.dispatch(t.upper) + if t.step: + self.write(":") + self.dispatch(t.step) + + def _ExtSlice(self, t): + interleave(lambda: self.write(', '), self.dispatch, t.dims) + + # argument + def _arg(self, t): + self.write(t.arg) + if t.annotation: + self.write(": ") + self.dispatch(t.annotation) + + # others + def _arguments(self, t): + first = True + # normal arguments + defaults = [None] * (len(t.args) - len(t.defaults)) + t.defaults + for a, d in zip(t.args, defaults): + if first:first = False + else: self.write(", ") + self.dispatch(a) + if d: + self.write("=") + self.dispatch(d) + + # varargs, or bare '*' if no varargs but keyword-only arguments present + if t.vararg or t.kwonlyargs: + if first:first = False + else: self.write(", ") + self.write("*") + if t.vararg: + self.write(t.vararg) + if t.varargannotation: + self.write(": ") + self.dispatch(t.varargannotation) + + # keyword-only arguments + if t.kwonlyargs: + for a, d in zip(t.kwonlyargs, t.kw_defaults): + if first:first = False + else: self.write(", ") + self.dispatch(a), + if d: + self.write("=") + self.dispatch(d) + + # kwargs + if t.kwarg: + if first:first = False + else: self.write(", ") + self.write("**"+t.kwarg) + if t.kwargannotation: + self.write(": ") + self.dispatch(t.kwargannotation) + + def _keyword(self, t): + self.write(t.arg) + self.write("=") + self.dispatch(t.value) + + def _Lambda(self, t): + self.write("(") + self.write("lambda ") + self.dispatch(t.args) + self.write(": ") + self.dispatch(t.body) + self.write(")") + + def _alias(self, t): + self.write(t.name) + if t.asname: + self.write(" as "+t.asname) + +def roundtrip(filename, output=sys.stdout): + with open(filename, "rb") as pyfile: + encoding = tokenize.detect_encoding(pyfile.readline)[0] + with open(filename, "r", encoding=encoding) as pyfile: + source = pyfile.read() + tree = compile(source, filename, "exec", ast.PyCF_ONLY_AST) + Unparser(tree, output) + + + +def testdir(a): + try: + names = [n for n in os.listdir(a) if n.endswith('.py')] + except OSError: + print("Directory not readable: %s" % a, file=sys.stderr) + else: + for n in names: + fullname = os.path.join(a, n) + if os.path.isfile(fullname): + output = io.StringIO() + print('Testing %s' % fullname) + try: + roundtrip(fullname, output) + except Exception as e: + print(' Failed to compile, exception is %s' % repr(e)) + elif os.path.isdir(fullname): + testdir(fullname) + +def main(args): + if args[0] == '--testdir': + for a in args[1:]: + testdir(a) + else: + for a in args: + roundtrip(a) + +if __name__=='__main__': + main(sys.argv[1:]) |