1 files changed, 29 insertions, 3 deletions
diff --git a/Lib/compiler/transformer.py b/Lib/compiler/transformer.py
index 382ea41..d1001bd 100644
--- a/Lib/compiler/transformer.py
+++ b/Lib/compiler/transformer.py
@@ -37,7 +37,11 @@ from consts import OP_ASSIGN, OP_DELETE, OP_APPLY
 
 def parseFile(path):
     f = open(path)
-    src = f.read()
+    # XXX The parser API tolerates files without a trailing newline,
+    # but not strings without a trailing newline.  Always add an extra
+    # newline to the file contents, since we're going through the string
+    # version of the API.
+    src = f.read() + "\n"
     f.close()
     return parse(src)
 
@@ -100,6 +104,7 @@ class Transformer:
                                token.STRING: self.atom_string,
                                token.NAME: self.atom_name,
                                }
+        self.encoding = None
 
     def transform(self, tree):
         """Transform an AST into a modified parse tree."""
@@ -110,6 +115,7 @@ class Transformer:
     def parsesuite(self, text):
         """Return a modified parse tree for the given suite text."""
         # Hack for handling non-native line endings on non-DOS like OSs.
+        # this can go now we have universal newlines?
         text = text.replace('\x0d', '')
         return self.transform(parser.suite(text))
 
@@ -131,6 +137,12 @@ class Transformer:
     def compile_node(self, node):
         ### emit a line-number node?
         n = node[0]
+
+        if n == symbol.encoding_decl:
+            self.encoding = node[2]
+            node = node[1]
+            n = node[0]
+        
         if n == symbol.single_input:
             return self.single_input(node[1:])
         if n == symbol.file_input:
@@ -519,6 +531,7 @@ class Transformer:
         return self.com_binary(Tuple, nodelist)
 
     testlist_safe = testlist # XXX
+    testlist1 = testlist
     exprlist = testlist
 
     def test(self, nodelist):
@@ -637,11 +650,14 @@ class Transformer:
     def factor(self, nodelist):
         elt = nodelist[0]
         t = elt[0]
+        print "source", nodelist[-1]
         node = self.com_node(nodelist[-1])
+        # need to handle (unary op)constant here...
         if t == token.PLUS:
             node = UnaryAdd(node)
             node.lineno = elt[2]
         elif t == token.MINUS:
+            print node
             node = UnarySub(node)
             node.lineno = elt[2]
         elif t == token.TILDE:
@@ -699,11 +715,21 @@ class Transformer:
         n.lineno = nodelist[0][2]
         return n
 
+    def decode_literal(self, lit):
+        if self.encoding:
+            # this is particularly fragile & a bit of a
+            # hack... changes in compile.c:parsestr and
+            # tokenizer.c must be reflected here.
+            if self.encoding not in ['utf-8', 'iso-8859-1']:
+                lit = unicode(lit, 'utf-8').encode(self.encoding)
+            return eval("# coding: %s\n%s" % (self.encoding, lit))
+        else:
+            return eval(lit)
+
     def atom_string(self, nodelist):
-        ### need to verify this matches compile.c
         k = ''
         for node in nodelist:
-            k = k + eval(node[1])
+            k += self.decode_literal(node[1])
         n = Const(k)
         n.lineno = nodelist[0][2]
         return n