10 files changed, 341 insertions, 396 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 9407b2f..6b06dec 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -1,119 +1,120 @@
--- ASDL's four builtin types are identifier, int, string, object
+-- ASDL's five builtin types are identifier, int, string, bytes, object
 
-module Python version "$Revision$"
+module Python
 {
-	mod = Module(stmt* body)
-	    | Interactive(stmt* body)
-	    | Expression(expr body)
+    mod = Module(stmt* body)
+        | Interactive(stmt* body)
+        | Expression(expr body)
 
-	    -- not really an actual node but useful in Jython's typesystem.
-	    | Suite(stmt* body)
+        -- not really an actual node but useful in Jython's typesystem.
+        | Suite(stmt* body)
 
-	stmt = FunctionDef(identifier name, arguments args, 
+    stmt = FunctionDef(identifier name, arguments args, 
                            stmt* body, expr* decorator_list, expr? returns)
-	      | ClassDef(identifier name, 
-			 expr* bases,
-			 keyword* keywords,
-			 expr? starargs,
-			 expr? kwargs,
-			 stmt* body,
-			 expr* decorator_list)
-	      | Return(expr? value)
-
-	      | Delete(expr* targets)
-	      | Assign(expr* targets, expr value)
-	      | AugAssign(expr target, operator op, expr value)
-
-	      -- use 'orelse' because else is a keyword in target languages
-	      | For(expr target, expr iter, stmt* body, stmt* orelse)
-	      | While(expr test, stmt* body, stmt* orelse)
-	      | If(expr test, stmt* body, stmt* orelse)
-	      | With(expr context_expr, expr? optional_vars, stmt* body)
-
-	      | Raise(expr? exc, expr? cause)
-	      | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
-	      | TryFinally(stmt* body, stmt* finalbody)
-	      | Assert(expr test, expr? msg)
-
-	      | Import(alias* names)
-	      | ImportFrom(identifier? module, alias* names, int? level)
-
-	      | Global(identifier* names)
-	      | Nonlocal(identifier* names)
-	      | Expr(expr value)
-	      | Pass | Break | Continue
-
-	      -- XXX Jython will be different
-	      -- col_offset is the byte offset in the utf8 string the parser uses
-	      attributes (int lineno, int col_offset)
-
-	      -- BoolOp() can use left & right?
-	expr = BoolOp(boolop op, expr* values)
-	     | BinOp(expr left, operator op, expr right)
-	     | UnaryOp(unaryop op, expr operand)
-	     | Lambda(arguments args, expr body)
-	     | IfExp(expr test, expr body, expr orelse)
-	     | Dict(expr* keys, expr* values)
-	     | Set(expr* elts)
-	     | ListComp(expr elt, comprehension* generators)
-	     | SetComp(expr elt, comprehension* generators)
-	     | DictComp(expr key, expr value, comprehension* generators)
-	     | GeneratorExp(expr elt, comprehension* generators)
-	     -- the grammar constrains where yield expressions can occur
-	     | Yield(expr? value)
-	     -- need sequences for compare to distinguish between
-	     -- x < 4 < 3 and (x < 4) < 3
-	     | Compare(expr left, cmpop* ops, expr* comparators)
-	     | Call(expr func, expr* args, keyword* keywords,
-			 expr? starargs, expr? kwargs)
-	     | Num(object n) -- a number as a PyObject.
-	     | Str(string s) -- need to specify raw, unicode, etc?
-	     | Bytes(string s)
-	     | Ellipsis
-	     -- other literals? bools?
-
-	     -- the following expression can appear in assignment context
-	     | Attribute(expr value, identifier attr, expr_context ctx)
-	     | Subscript(expr value, slice slice, expr_context ctx)
-	     | Starred(expr value, expr_context ctx)
-	     | Name(identifier id, expr_context ctx)
-	     | List(expr* elts, expr_context ctx) 
-	     | Tuple(expr* elts, expr_context ctx)
-
-	      -- col_offset is the byte offset in the utf8 string the parser uses
-	      attributes (int lineno, int col_offset)
-
-	expr_context = Load | Store | Del | AugLoad | AugStore | Param
-
-	slice = Slice(expr? lower, expr? upper, expr? step) 
-	      | ExtSlice(slice* dims) 
-	      | Index(expr value) 
-
-	boolop = And | Or 
-
-	operator = Add | Sub | Mult | Div | Mod | Pow | LShift 
+          | ClassDef(identifier name, 
+             expr* bases,
+             keyword* keywords,
+             expr? starargs,
+             expr? kwargs,
+             stmt* body,
+             expr* decorator_list)
+          | Return(expr? value)
+
+          | Delete(expr* targets)
+          | Assign(expr* targets, expr value)
+          | AugAssign(expr target, operator op, expr value)
+
+          -- use 'orelse' because else is a keyword in target languages
+          | For(expr target, expr iter, stmt* body, stmt* orelse)
+          | While(expr test, stmt* body, stmt* orelse)
+          | If(expr test, stmt* body, stmt* orelse)
+          | With(withitem* items, stmt* body)
+
+          | Raise(expr? exc, expr? cause)
+          | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
+          | Assert(expr test, expr? msg)
+
+          | Import(alias* names)
+          | ImportFrom(identifier? module, alias* names, int? level)
+
+          | Global(identifier* names)
+          | Nonlocal(identifier* names)
+          | Expr(expr value)
+          | Pass | Break | Continue
+
+          -- XXX Jython will be different
+          -- col_offset is the byte offset in the utf8 string the parser uses
+          attributes (int lineno, int col_offset)
+
+          -- BoolOp() can use left & right?
+    expr = BoolOp(boolop op, expr* values)
+         | BinOp(expr left, operator op, expr right)
+         | UnaryOp(unaryop op, expr operand)
+         | Lambda(arguments args, expr body)
+         | IfExp(expr test, expr body, expr orelse)
+         | Dict(expr* keys, expr* values)
+         | Set(expr* elts)
+         | ListComp(expr elt, comprehension* generators)
+         | SetComp(expr elt, comprehension* generators)
+         | DictComp(expr key, expr value, comprehension* generators)
+         | GeneratorExp(expr elt, comprehension* generators)
+         -- the grammar constrains where yield expressions can occur
+         | Yield(expr? value)
+             | YieldFrom(expr? value)
+         -- need sequences for compare to distinguish between
+         -- x < 4 < 3 and (x < 4) < 3
+         | Compare(expr left, cmpop* ops, expr* comparators)
+         | Call(expr func, expr* args, keyword* keywords,
+             expr? starargs, expr? kwargs)
+         | Num(object n) -- a number as a PyObject.
+         | Str(string s) -- need to specify raw, unicode, etc?
+         | Bytes(bytes s)
+         | Ellipsis
+         -- other literals? bools?
+
+         -- the following expression can appear in assignment context
+         | Attribute(expr value, identifier attr, expr_context ctx)
+         | Subscript(expr value, slice slice, expr_context ctx)
+         | Starred(expr value, expr_context ctx)
+         | Name(identifier id, expr_context ctx)
+         | List(expr* elts, expr_context ctx) 
+         | Tuple(expr* elts, expr_context ctx)
+
+          -- col_offset is the byte offset in the utf8 string the parser uses
+          attributes (int lineno, int col_offset)
+
+    expr_context = Load | Store | Del | AugLoad | AugStore | Param
+
+    slice = Slice(expr? lower, expr? upper, expr? step) 
+          | ExtSlice(slice* dims) 
+          | Index(expr value) 
+
+    boolop = And | Or 
+
+    operator = Add | Sub | Mult | Div | Mod | Pow | LShift 
                  | RShift | BitOr | BitXor | BitAnd | FloorDiv
 
-	unaryop = Invert | Not | UAdd | USub
+    unaryop = Invert | Not | UAdd | USub
 
-	cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
+    cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
 
-	comprehension = (expr target, expr iter, expr* ifs)
+    comprehension = (expr target, expr iter, expr* ifs)
 
-	-- not sure what to call the first argument for raise and except
-	excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
-	                attributes (int lineno, int col_offset)
+    excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
+                    attributes (int lineno, int col_offset)
 
-	arguments = (arg* args, identifier? vararg, expr? varargannotation,
+    arguments = (arg* args, identifier? vararg, expr? varargannotation,
                      arg* kwonlyargs, identifier? kwarg,
                      expr? kwargannotation, expr* defaults,
                      expr* kw_defaults)
-	arg = (identifier arg, expr? annotation)
+    arg = (identifier arg, expr? annotation)
 
-        -- keyword arguments supplied to call
-        keyword = (identifier arg, expr value)
+    -- keyword arguments supplied to call
+    keyword = (identifier arg, expr value)
 
-        -- import name with optional 'as' alias.
-        alias = (identifier name, identifier? asname)
+    -- import name with optional 'as' alias.
+    alias = (identifier name, identifier? asname)
+
+    withitem = (expr context_expr, expr? optional_vars)
 }
 
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 7b4e2dc..08aa05b 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object):
         raise ASDLSyntaxError(tok.lineno, tok)
 
     def p_module_0(self, info):
-        " module ::= Id Id version { } "
-        module, name, version, _0, _1 = info
+        " module ::= Id Id { } "
+        module, name, _0, _1 = info
         if module.value != "module":
             raise ASDLSyntaxError(module.lineno,
                                   msg="expected 'module', found %s" % module)
-        return Module(name, None, version)
+        return Module(name, None)
 
     def p_module(self, info):
-        " module ::= Id Id version { definitions } "
-        module, name, version, _0, definitions, _1 = info
+        " module ::= Id Id { definitions } "
+        module, name, _0, definitions, _1 = info
         if module.value != "module":
             raise ASDLSyntaxError(module.lineno,
                                   msg="expected 'module', found %s" % module)
-        return Module(name, definitions, version)
-
-    def p_version(self, info):
-        "version ::= Id String"
-        version, V = info
-        if version.value != "version":
-            raise ASDLSyntaxError(version.lineno,
-                                  msg="expected 'version', found %" % version)
-        return V
+        return Module(name, definitions)
 
     def p_definition_0(self, definition):
         " definitions ::= definition "
@@ -164,15 +156,11 @@ class ASDLParser(spark.GenericParser, object):
         if id.value != "attributes":
             raise ASDLSyntaxError(id.lineno,
                                   msg="expected attributes, found %s" % id)
-        if attributes:
-            attributes.reverse()
         return Sum(sum, attributes)
 
     def p_product(self, info):
         " product ::= ( fields ) "
         _0, fields, _1 = info
-        # XXX can't I just construct things in the right order?
-        fields.reverse()
         return Product(fields)
 
     def p_sum_0(self, constructor):
@@ -196,8 +184,6 @@ class ASDLParser(spark.GenericParser, object):
     def p_constructor_1(self, info):
         " constructor ::= Id ( fields ) "
         id, _0, fields, _1 = info
-        # XXX can't I just construct things in the right order?
-        fields.reverse()
         return Constructor(id, fields)
 
     def p_fields_0(self, field):
@@ -205,8 +191,8 @@ class ASDLParser(spark.GenericParser, object):
         return [field[0]]
 
     def p_fields_1(self, info):
-        " fields ::= field , fields "
-        field, _, fields = info
+        " fields ::= fields , field "
+        fields, _, field = info
         return fields + [field]
 
     def p_field_0(self, type_):
@@ -236,7 +222,7 @@ class ASDLParser(spark.GenericParser, object):
         " field ::= Id ? "
         return Field(type[0], opt=True)
 
-builtin_types = ("identifier", "string", "int", "bool", "object")
+builtin_types = ("identifier", "string", "bytes", "int", "object")
 
 # below is a collection of classes to capture the AST of an AST :-)
 # not sure if any of the methods are useful yet, but I'm adding them
@@ -246,10 +232,9 @@ class AST(object):
     pass # a marker class
 
 class Module(AST):
-    def __init__(self, name, dfns, version):
+    def __init__(self, name, dfns):
         self.name = name
         self.dfns = dfns
-        self.version = version
         self.types = {} # maps type name to value (from dfns)
         for type in dfns:
             self.types[type.name.value] = type.value
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index b85c07e..698afac 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -5,6 +5,7 @@
 # handle fields that have a type but no name
 
 import os, sys
+import subprocess
 
 import asdl
 
@@ -84,8 +85,16 @@ class EmitVisitor(asdl.VisitorBase):
 
     def __init__(self, file):
         self.file = file
+        self.identifiers = set()
         super(EmitVisitor, self).__init__()
 
+    def emit_identifier(self, name):
+        name = str(name)
+        if name in self.identifiers:
+            return
+        self.emit("_Py_IDENTIFIER(%s);" % name, 0)
+        self.identifiers.add(name)
+
     def emit(self, s, depth, reflow=True):
         # XXX reflow long lines?
         if reflow:
@@ -485,12 +494,12 @@ class Obj2ModVisitor(PickleVisitor):
 
     def visitField(self, field, name, sum=None, prod=None, depth=0):
         ctype = get_c_type(field.type)
-        self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
+        self.emit("if (_PyObject_HasAttrId(obj, &PyId_%s)) {" % field.name, depth)
         self.emit("int res;", depth+1)
         if field.seq:
             self.emit("Py_ssize_t len;", depth+1)
             self.emit("Py_ssize_t i;", depth+1)
-        self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
+        self.emit("tmp = _PyObject_GetAttrId(obj, &PyId_%s);" % field.name, depth+1)
         self.emit("if (tmp == NULL) goto failed;", depth+1)
         if field.seq:
             self.emit("if (!PyList_Check(tmp)) {", depth+1)
@@ -552,6 +561,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
         self.emit("static PyTypeObject *%s_type;" % name, 0)
         self.emit("static PyObject* ast2obj_%s(void*);" % name, 0)
         if prod.fields:
+            for f in prod.fields:
+                self.emit_identifier(f.name)
             self.emit("static char *%s_fields[]={" % name,0)
             for f in prod.fields:
                 self.emit('"%s",' % f.name, 1)
@@ -560,6 +571,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
     def visitSum(self, sum, name):
         self.emit("static PyTypeObject *%s_type;" % name, 0)
         if sum.attributes:
+            for a in sum.attributes:
+                self.emit_identifier(a.name)
             self.emit("static char *%s_attributes[] = {" % name, 0)
             for a in sum.attributes:
                 self.emit('"%s",' % a.name, 1)
@@ -579,6 +592,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
     def visitConstructor(self, cons, name):
         self.emit("static PyTypeObject *%s_type;" % cons.name, 0)
         if cons.fields:
+            for t in cons.fields:
+                self.emit_identifier(t.name)
             self.emit("static char *%s_fields[]={" % cons.name, 0)
             for t in cons.fields:
                 self.emit('"%s",' % t.name, 1)
@@ -588,13 +603,39 @@ class PyTypesVisitor(PickleVisitor):
 
     def visitModule(self, mod):
         self.emit("""
+typedef struct {
+    PyObject_HEAD
+    PyObject *dict;
+} AST_object;
+
+static void
+ast_dealloc(AST_object *self)
+{
+    Py_CLEAR(self->dict);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static int
+ast_traverse(AST_object *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->dict);
+    return 0;
+}
+
+static void
+ast_clear(AST_object *self)
+{
+    Py_CLEAR(self->dict);
+}
+
 static int
 ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
 {
+    _Py_IDENTIFIER(_fields);
     Py_ssize_t i, numfields = 0;
     int res = -1;
     PyObject *key, *value, *fields;
-    fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields");
+    fields = _PyObject_GetAttrId((PyObject*)Py_TYPE(self), &PyId__fields);
     if (!fields)
         PyErr_Clear();
     if (fields) {
@@ -644,7 +685,8 @@ static PyObject *
 ast_type_reduce(PyObject *self, PyObject *unused)
 {
     PyObject *res;
-    PyObject *dict = PyObject_GetAttrString(self, "__dict__");
+    _Py_IDENTIFIER(__dict__);
+    PyObject *dict = _PyObject_GetAttrId(self, &PyId___dict__);
     if (dict == NULL) {
         if (PyErr_ExceptionMatches(PyExc_AttributeError))
             PyErr_Clear();
@@ -664,12 +706,17 @@ static PyMethodDef ast_type_methods[] = {
     {NULL}
 };
 
+static PyGetSetDef ast_type_getsets[] = {
+    {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict},
+    {NULL}
+};
+
 static PyTypeObject AST_type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "_ast.AST",
-    sizeof(PyObject),
+    sizeof(AST_object),
     0,
-    0,                       /* tp_dealloc */
+    (destructor)ast_dealloc, /* tp_dealloc */
     0,                       /* tp_print */
     0,                       /* tp_getattr */
     0,                       /* tp_setattr */
@@ -684,26 +731,26 @@ static PyTypeObject AST_type = {
     PyObject_GenericGetAttr, /* tp_getattro */
     PyObject_GenericSetAttr, /* tp_setattro */
     0,                       /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
     0,                       /* tp_doc */
-    0,                       /* tp_traverse */
-    0,                       /* tp_clear */
+    (traverseproc)ast_traverse, /* tp_traverse */
+    (inquiry)ast_clear,      /* tp_clear */
     0,                       /* tp_richcompare */
     0,                       /* tp_weaklistoffset */
     0,                       /* tp_iter */
     0,                       /* tp_iternext */
     ast_type_methods,        /* tp_methods */
     0,                       /* tp_members */
-    0,                       /* tp_getset */
+    ast_type_getsets,        /* tp_getset */
     0,                       /* tp_base */
     0,                       /* tp_dict */
     0,                       /* tp_descr_get */
     0,                       /* tp_descr_set */
-    0,                       /* tp_dictoffset */
+    offsetof(AST_object, dict),/* tp_dictoffset */
     (initproc)ast_type_init, /* tp_init */
     PyType_GenericAlloc,     /* tp_alloc */
     PyType_GenericNew,       /* tp_new */
-    PyObject_Del,            /* tp_free */
+    PyObject_GC_Del,         /* tp_free */
 };
 
 
@@ -730,6 +777,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int
 static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
 {
     int i, result;
+    _Py_IDENTIFIER(_attributes);
     PyObject *s, *l = PyTuple_New(num_fields);
     if (!l)
         return 0;
@@ -741,7 +789,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
         }
         PyTuple_SET_ITEM(l, i, s);
     }
-    result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0;
+    result = _PyObject_SetAttrId((PyObject*)type, &PyId__attributes, l) >= 0;
     Py_DECREF(l);
     return result;
 }
@@ -750,7 +798,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
 
 static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
 {
-    int i, n = asdl_seq_LEN(seq);
+    Py_ssize_t i, n = asdl_seq_LEN(seq);
     PyObject *result = PyList_New(n);
     PyObject *value;
     if (!result)
@@ -775,6 +823,7 @@ static PyObject* ast2obj_object(void *o)
 }
 #define ast2obj_identifier ast2obj_object
 #define ast2obj_string ast2obj_object
+#define ast2obj_bytes ast2obj_object
 
 static PyObject* ast2obj_int(long b)
 {
@@ -812,6 +861,15 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
     return obj2ast_object(obj, out, arena);
 }
 
+static int obj2ast_bytes(PyObject* obj, PyObject** out, PyArena* arena)
+{
+    if (!PyBytes_CheckExact(obj)) {
+        PyErr_SetString(PyExc_TypeError, "AST bytes must be of type bytes");
+        return 1;
+    }
+    return obj2ast_object(obj, out, arena);
+}
+
 static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
 {
     int i;
@@ -910,10 +968,6 @@ class ASTModuleVisitor(PickleVisitor):
         self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
         self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1)
         self.emit("return NULL;", 2)
-        # Value of version: "$Revision$"
-        self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)'
-                % mod.version, 1)
-        self.emit("return NULL;", 2)
         for dfn in mod.dfns:
             self.visit(dfn)
         self.emit("return m;", 1)
@@ -997,7 +1051,7 @@ class ObjVisitor(PickleVisitor):
         for a in sum.attributes:
             self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
             self.emit("if (!value) goto failed;", 1)
-            self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1)
+            self.emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) < 0)' % a.name, 1)
             self.emit('goto failed;', 2)
             self.emit('Py_DECREF(value);', 1)
         self.func_end()
@@ -1043,7 +1097,7 @@ class ObjVisitor(PickleVisitor):
             value = "o->v.%s.%s" % (name, field.name)
         self.set(field, value, depth)
         emit("if (!value) goto failed;", 0)
-        emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0)
+        emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) == -1)' % field.name, 0)
         emit("goto failed;", 1)
         emit("Py_DECREF(value);", 0)
 
@@ -1066,7 +1120,7 @@ class ObjVisitor(PickleVisitor):
                 # While the sequence elements are stored as void*,
                 # ast2obj_cmpop expects an enum
                 self.emit("{", depth)
-                self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1)
+                self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1)
                 self.emit("value = PyList_New(n);", depth+1)
                 self.emit("if (!value) goto failed;", depth+1)
                 self.emit("for(i = 0; i < n; i++)", depth+1)
@@ -1134,24 +1188,12 @@ class ChainOfVisitors:
 
 common_msg = "/* File automatically generated by %s. */\n\n"
 
-c_file_msg = """
-/*
-   __version__ %s.
-
-   This module must be committed separately after each AST grammar change;
-   The __version__ number is set to the revision number of the commit
-   containing the grammar change.
-*/
-
-"""
-
 def main(srcfile):
     argv0 = sys.argv[0]
     components = argv0.split(os.sep)
     argv0 = os.sep.join(components[-2:])
     auto_gen_msg = common_msg % argv0
     mod = asdl.parse(srcfile)
-    mod.version = "82163"
     if not asdl.check(mod):
         sys.exit(1)
     if INC_DIR:
@@ -1173,7 +1215,8 @@ def main(srcfile):
         p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c")
         f = open(p, "w")
         f.write(auto_gen_msg)
-        f.write(c_file_msg % mod.version)
+        f.write('#include <stddef.h>\n')
+        f.write('\n')
         f.write('#include "Python.h"\n')
         f.write('#include "%s-ast.h"\n' % mod.name)
         f.write('\n')
diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c
deleted file mode 100644
index 4439864..0000000
--- a/Parser/intrcheck.c
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/* Check for interrupts */
-
-#include "Python.h"
-#include "pythread.h"
-
-#ifdef QUICKWIN
-
-#include <io.h>
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
-    _wyield();
-}
-
-#define OK
-
-#endif /* QUICKWIN */
-
-#if defined(_M_IX86) && !defined(__QNX__)
-#include <io.h>
-#endif
-
-#if defined(MSDOS) && !defined(QUICKWIN)
-
-#ifdef __GNUC__
-
-/* This is for DJGPP's GO32 extender.  I don't know how to trap
- * control-C  (There's no API for ctrl-C, and I don't want to mess with
- * the interrupt vectors.)  However, this DOES catch control-break.
- * --Amrit
- */
-
-#include <go32.h>
-
-void
-PyOS_InitInterrupts(void)
-{
-    _go32_want_ctrl_break(1 /* TRUE */);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
-    return _go32_was_ctrl_break_hit();
-}
-
-#else /* !__GNUC__ */
-
-/* This might work for MS-DOS (untested though): */
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
-    int interrupted = 0;
-    while (kbhit()) {
-        if (getch() == '\003')
-            interrupted = 1;
-    }
-    return interrupted;
-}
-
-#endif /* __GNUC__ */
-
-#define OK
-
-#endif /* MSDOS && !QUICKWIN */
-
-
-#ifndef OK
-
-/* Default version -- for real operating systems and for Standard C */
-
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-
-static int interrupted;
-
-void
-PyErr_SetInterrupt(void)
-{
-    interrupted = 1;
-}
-
-extern int PyErr_CheckSignals(void);
-
-static int
-checksignals_witharg(void * arg)
-{
-    return PyErr_CheckSignals();
-}
-
-static void
-intcatcher(int sig)
-{
-    extern void Py_Exit(int);
-    static char message[] =
-"python: to interrupt a truly hanging Python program, interrupt once more.\n";
-    switch (interrupted++) {
-    case 0:
-        break;
-    case 1:
-        write(2, message, strlen(message));
-        break;
-    case 2:
-        interrupted = 0;
-        Py_Exit(1);
-        break;
-    }
-    PyOS_setsig(SIGINT, intcatcher);
-    Py_AddPendingCall(checksignals_witharg, NULL);
-}
-
-static void (*old_siginthandler)(int) = SIG_DFL;
-
-void
-PyOS_InitInterrupts(void)
-{
-    if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
-        PyOS_setsig(SIGINT, intcatcher);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-    PyOS_setsig(SIGINT, old_siginthandler);
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
-    if (!interrupted)
-        return 0;
-    interrupted = 0;
-    return 1;
-}
-
-#endif /* !OK */
-
-void
-PyOS_AfterFork(void)
-{
-#ifdef WITH_THREAD
-    PyEval_ReInitThreads();
-    PyThread_ReInitTLS();
-#endif
-}
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 93edd7f..d864623 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -35,12 +35,11 @@ int (*PyOS_InputHook)(void) = NULL;
 static int
 my_fgets(char *buf, int len, FILE *fp)
 {
-    char *p;
-    int err;
 #ifdef MS_WINDOWS
-    int i;
+    HANDLE hInterruptEvent;
 #endif
-
+    char *p;
+    int err;
     while (1) {
         if (PyOS_InputHook != NULL)
             (void)(PyOS_InputHook)();
@@ -57,20 +56,24 @@ my_fgets(char *buf, int len, FILE *fp)
         /* Ctrl-C anywhere on the line or Ctrl-Z if the only character
            on a line will set ERROR_OPERATION_ABORTED. Under normal
            circumstances Ctrl-C will also have caused the SIGINT handler
-           to fire. This signal fires in another thread and is not
-           guaranteed to have occurred before this point in the code.
-
-           Therefore: check in a small loop to see if the trigger has
-           fired, in which case assume this is a Ctrl-C event. If it
-           hasn't fired within 10ms assume that this is a Ctrl-Z on its
-           own or that the signal isn't going to fire for some other
-           reason and drop through to check for EOF.
+           to fire which will have set the event object returned by
+           _PyOS_SigintEvent. This signal fires in another thread and
+           is not guaranteed to have occurred before this point in the
+           code.
+
+           Therefore: check whether the event is set with a small timeout.
+           If it is, assume this is a Ctrl-C and reset the event. If it
+           isn't set assume that this is a Ctrl-Z on its own and drop
+           through to check for EOF.
         */
         if (GetLastError()==ERROR_OPERATION_ABORTED) {
-            for (i = 0; i < 10; i++) {
-                if (PyOS_InterruptOccurred())
-                    return 1;
-            Sleep(1);
+            hInterruptEvent = _PyOS_SigintEvent();
+            switch (WaitForSingleObject(hInterruptEvent, 10)) {
+            case WAIT_OBJECT_0:
+                ResetEvent(hInterruptEvent);
+                return 1; /* Interrupt */
+            case WAIT_FAILED:
+                return -2; /* Error */
             }
         }
 #endif /* MS_WINDOWS */
@@ -90,7 +93,7 @@ my_fgets(char *buf, int len, FILE *fp)
 #endif
             if (s < 0)
                     return 1;
-	    /* try again */
+        /* try again */
             continue;
         }
 #endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index bb9afc4..7beb735 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -13,7 +13,7 @@
 
 /* Forward */
 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
-static void initerr(perrdetail *err_ret, const char* filename);
+static int initerr(perrdetail *err_ret, const char* filename);
 
 /* Parse input coming from a string.  Return error code, print some errors. */
 node *
@@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
     struct tok_state *tok;
     int exec_input = start == file_input;
 
-    initerr(err_ret, filename);
+    if (initerr(err_ret, filename) < 0)
+        return NULL;
 
     if (*flags & PyPARSE_IGNORE_COOKIE)
         tok = PyTokenizer_FromUTF8(s, exec_input);
@@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
         return NULL;
     }
 
-    tok->filename = filename ? filename : "<string>";
+#ifndef PGEN
+    Py_INCREF(err_ret->filename);
+    tok->filename = err_ret->filename;
+#endif
     return parsetok(tok, g, start, err_ret, flags);
 }
 
@@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
 {
     struct tok_state *tok;
 
-    initerr(err_ret, filename);
+    if (initerr(err_ret, filename) < 0)
+        return NULL;
 
     if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
         err_ret->error = E_NOMEM;
         return NULL;
     }
-    tok->filename = filename;
+#ifndef PGEN
+    Py_INCREF(err_ret->filename);
+    tok->filename = err_ret->filename;
+#endif
     return parsetok(tok, g, start, err_ret, flags);
 }
 
@@ -216,6 +224,36 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     if (err_ret->error == E_DONE) {
         n = ps->p_tree;
         ps->p_tree = NULL;
+
+#ifndef PGEN
+        /* Check that the source for a single input statement really
+           is a single statement by looking at what is left in the
+           buffer after parsing.  Trailing whitespace and comments
+           are OK.  */
+        if (start == single_input) {
+            char *cur = tok->cur;
+            char c = *tok->cur;
+
+            for (;;) {
+                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
+                    c = *++cur;
+
+                if (!c)
+                    break;
+
+                if (c != '#') {
+                    err_ret->error = E_BADSINGLE;
+                    PyNode_Free(n);
+                    n = NULL;
+                    break;
+                }
+
+                /* Suck up comment. */
+                while (c && c != '\n')
+                    c = *++cur;
+            }
+        }
+#endif
     }
     else
         n = NULL;
@@ -226,7 +264,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     PyParser_Delete(ps);
 
     if (n == NULL) {
-        if (tok->lineno <= 1 && tok->done == E_EOF)
+        if (tok->done == E_EOF)
             err_ret->error = E_EOF;
         err_ret->lineno = tok->lineno;
         if (tok->buf != NULL) {
@@ -269,14 +307,24 @@ done:
     return n;
 }
 
-static void
+static int
 initerr(perrdetail *err_ret, const char *filename)
 {
     err_ret->error = E_OK;
-    err_ret->filename = filename;
     err_ret->lineno = 0;
     err_ret->offset = 0;
     err_ret->text = NULL;
     err_ret->token = -1;
     err_ret->expected = -1;
+#ifndef PGEN
+    if (filename)
+        err_ret->filename = PyUnicode_DecodeFSDefault(filename);
+    else
+        err_ret->filename = PyUnicode_FromString("<string>");
+    if (err_ret->filename == NULL) {
+        err_ret->error = E_ERROR;
+        return -1;
+    }
+#endif
+    return 0;
 }
diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c
new file mode 100644
index 0000000..97b9288
--- /dev/null
+++ b/Parser/parsetok_pgen.c
@@ -0,0 +1,2 @@
+#define PGEN
+#include "parsetok.c"
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 4b7b55a..52b8380 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag;
 /* Forward */
 grammar *getgrammar(char *filename);
 
+void Py_Exit(int) _Py_NO_RETURN;
+
 void
 Py_Exit(int sts)
 {
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5ba12a4..93a4a5c 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -128,7 +128,6 @@ tok_new(void)
     tok->prompt = tok->nextprompt = NULL;
     tok->lineno = 0;
     tok->level = 0;
-    tok->filename = NULL;
     tok->altwarning = 1;
     tok->alterror = 1;
     tok->alttabsize = 1;
@@ -140,6 +139,7 @@ tok_new(void)
     tok->encoding = NULL;
     tok->cont_line = 0;
 #ifndef PGEN
+    tok->filename = NULL;
     tok->decoding_readline = NULL;
     tok->decoding_buffer = NULL;
 #endif
@@ -462,6 +462,8 @@ static int
 fp_setreadl(struct tok_state *tok, const char* enc)
 {
     PyObject *readline = NULL, *stream = NULL, *io = NULL;
+    _Py_IDENTIFIER(open);
+    _Py_IDENTIFIER(readline);
     int fd;
 
     io = PyImport_ImportModuleNoBlock("io");
@@ -474,13 +476,13 @@ fp_setreadl(struct tok_state *tok, const char* enc)
         goto cleanup;
     }
 
-    stream = PyObject_CallMethod(io, "open", "isisOOO",
+    stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO",
                     fd, "r", -1, enc, Py_None, Py_None, Py_False);
     if (stream == NULL)
         goto cleanup;
 
     Py_XDECREF(tok->decoding_readline);
-    readline = PyObject_GetAttrString(stream, "readline");
+    readline = _PyObject_GetAttrId(stream, &PyId_readline);
     tok->decoding_readline = readline;
 
     /* The file has been reopened; parsing will restart from
@@ -545,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
 {
     char *line = NULL;
     int badchar = 0;
-    PyObject *filename;
     for (;;) {
         if (tok->decoding_state == STATE_NORMAL) {
             /* We already have a codec associated with
@@ -586,19 +587,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
     if (badchar) {
         /* Need to add 1 to the line number, since this line
            has not been counted, yet.  */
-        if (tok->filename != NULL)
-            filename = PyUnicode_DecodeFSDefault(tok->filename);
-        else
-            filename = PyUnicode_FromString("<file>");
-        if (filename != NULL) {
-            PyErr_Format(PyExc_SyntaxError,
-                    "Non-UTF-8 code starting with '\\x%.2x' "
-                    "in file %U on line %i, "
-                    "but no encoding declared; "
-                    "see http://python.org/dev/peps/pep-0263/ for details",
-                    badchar, filename, tok->lineno + 1);
-            Py_DECREF(filename);
-        }
+        PyErr_Format(PyExc_SyntaxError,
+                "Non-UTF-8 code starting with '\\x%.2x' "
+                "in file %U on line %i, "
+                "but no encoding declared; "
+                "see http://python.org/dev/peps/pep-0263/ for details",
+                badchar, tok->filename, tok->lineno + 1);
         return error_ret(tok);
     }
 #endif
@@ -856,6 +850,7 @@ PyTokenizer_Free(struct tok_state *tok)
 #ifndef PGEN
     Py_XDECREF(tok->decoding_readline);
     Py_XDECREF(tok->decoding_buffer);
+    Py_XDECREF(tok->filename);
 #endif
     if (tok->fp != NULL && tok->buf != NULL)
         PyMem_FREE(tok->buf);
@@ -1250,8 +1245,13 @@ indenterror(struct tok_state *tok)
         return 1;
     }
     if (tok->altwarning) {
-        PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+#ifdef PGEN
+        PySys_WriteStderr("inconsistent use of tabs and spaces "
+                          "in indentation\n");
+#else
+        PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
                           "in indentation\n", tok->filename);
+#endif
         tok->altwarning = 0;
     }
     return 0;
@@ -1260,14 +1260,16 @@ indenterror(struct tok_state *tok)
 #ifdef PGEN
 #define verify_identifier(tok) 1
 #else
-/* Verify that the identifier follows PEP 3131. */
+/* Verify that the identifier follows PEP 3131.
+   All identifier strings are guaranteed to be "ready" unicode objects.
+ */
 static int
 verify_identifier(struct tok_state *tok)
 {
     PyObject *s;
     int result;
     s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
-    if (s == NULL) {
+    if (s == NULL || PyUnicode_READY(s) == -1) {
         if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
             PyErr_Clear();
             tok->done = E_IDENTIFIER;
@@ -1410,13 +1412,20 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
     /* Identifier (most frequent token!) */
     nonascii = 0;
     if (is_potential_identifier_start(c)) {
-        /* Process b"", r"" and br"" */
-        if (c == 'b' || c == 'B') {
-            c = tok_nextc(tok);
-            if (c == '"' || c == '\'')
-                goto letter_quote;
-        }
-        if (c == 'r' || c == 'R') {
+        /* Process b"", r"", u"", br"" and rb"" */
+        int saw_b = 0, saw_r = 0, saw_u = 0;
+        while (1) {
+            if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
+                saw_b = 1;
+            /* Since this is a backwards compatibility support literal we don't
+               want to support it in arbitrary order like byte literals. */
+            else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
+                saw_u = 1;
+            /* ur"" and ru"" are not supported */
+            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
+                saw_r = 1;
+            else
+                break;
             c = tok_nextc(tok);
             if (c == '"' || c == '\'')
                 goto letter_quote;
@@ -1692,17 +1701,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
     return result;
 }
 
-/* Get -*- encoding -*- from a Python file.
+/* Get the encoding of a Python file. Check for the coding cookie and check if
+   the file starts with a BOM.
 
-   PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
-   the first or second line of the file (in which case the encoding
-   should be assumed to be PyUnicode_GetDefaultEncoding()).
+   PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
+   encoding in the first or second line of the file (in which case the encoding
+   should be assumed to be UTF-8).
+
+   The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
+   by the caller. */
 
-   The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
-   by the caller.
-*/
 char *
-PyTokenizer_FindEncoding(int fd)
+PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
 {
     struct tok_state *tok;
     FILE *fp;
@@ -1721,6 +1731,20 @@ PyTokenizer_FindEncoding(int fd)
         fclose(fp);
         return NULL;
     }
+#ifndef PGEN
+    if (filename != NULL) {
+        Py_INCREF(filename);
+        tok->filename = filename;
+    }
+    else {
+        tok->filename = PyUnicode_FromString("<string>");
+        if (tok->filename == NULL) {
+            fclose(fp);
+            PyTokenizer_Free(tok);
+            return encoding;
+        }
+    }
+#endif
     while (tok->lineno < 2 && tok->done == E_OK) {
         PyTokenizer_Get(tok, &p_start, &p_end);
     }
@@ -1734,6 +1758,12 @@ PyTokenizer_FindEncoding(int fd)
     return encoding;
 }
 
+char *
+PyTokenizer_FindEncoding(int fd)
+{
+    return PyTokenizer_FindEncodingFilename(fd, NULL);
+}
+
 #ifdef Py_DEBUG
 
 void
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 2be3bf2..ed1f3aa 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -40,7 +40,13 @@ struct tok_state {
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
     /* Stuff for checking on different tab sizes */
-    const char *filename;   /* encoded to the filesystem encoding */
+#ifndef PGEN
+    /* pgen doesn't have access to Python codecs, it cannot decode the input
+       filename. The bytes filename might be kept, but it is only used by
+       indenterror() and it is not really needed: pgen only compiles one file
+       (Grammar/Grammar). */
+    PyObject *filename;
+#endif
     int altwarning;     /* Issue warning if alternate tabs don't match */
     int alterror;       /* Issue error if alternate tabs don't match */
     int alttabsize;     /* Alternate tab spacing */
@@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
                                           int len, int *offset);
-extern char * PyTokenizer_FindEncoding(int);
 
 #ifdef __cplusplus
 }