32 files changed, 4298 insertions, 3651 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 126d478..9a9b933 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -1,129 +1,115 @@
--- ASDL's 5 builtin types are:
--- identifier, int, string, object, constant
+-- ASDL's five builtin types are identifier, int, string, object, bool
 
-module Python
+module Python version "$Revision$"
 {
-    mod = Module(stmt* body, type_ignore *type_ignores)
-        | Interactive(stmt* body)
-        | Expression(expr body)
-        | FunctionType(expr* argtypes, expr returns)
-
-        -- not really an actual node but useful in Jython's typesystem.
-        | Suite(stmt* body)
-
-    stmt = FunctionDef(identifier name, arguments args,
-                       stmt* body, expr* decorator_list, expr? returns,
-                       string? type_comment)
-          | AsyncFunctionDef(identifier name, arguments args,
-                             stmt* body, expr* decorator_list, expr? returns,
-                             string? type_comment)
-
-          | ClassDef(identifier name,
-             expr* bases,
-             keyword* keywords,
-             stmt* body,
-             expr* decorator_list)
-          | Return(expr? value)
-
-          | Delete(expr* targets)
-          | Assign(expr* targets, expr value, string? type_comment)
-          | AugAssign(expr target, operator op, expr value)
-          -- 'simple' indicates that we annotate simple name without parens
-          | AnnAssign(expr target, expr annotation, expr? value, int simple)
-
-          -- use 'orelse' because else is a keyword in target languages
-          | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
-          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
-          | While(expr test, stmt* body, stmt* orelse)
-          | If(expr test, stmt* body, stmt* orelse)
-          | With(withitem* items, stmt* body, string? type_comment)
-          | AsyncWith(withitem* items, stmt* body, string? type_comment)
-
-          | Raise(expr? exc, expr? cause)
-          | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
-          | Assert(expr test, expr? msg)
-
-          | Import(alias* names)
-          | ImportFrom(identifier? module, alias* names, int? level)
-
-          | Global(identifier* names)
-          | Nonlocal(identifier* names)
-          | Expr(expr value)
-          | Pass | Break | Continue
-
-          -- XXX Jython will be different
-          -- col_offset is the byte offset in the utf8 string the parser uses
-          attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-
-          -- BoolOp() can use left & right?
-    expr = BoolOp(boolop op, expr* values)
-         | NamedExpr(expr target, expr value)
-         | BinOp(expr left, operator op, expr right)
-         | UnaryOp(unaryop op, expr operand)
-         | Lambda(arguments args, expr body)
-         | IfExp(expr test, expr body, expr orelse)
-         | Dict(expr* keys, expr* values)
-         | Set(expr* elts)
-         | ListComp(expr elt, comprehension* generators)
-         | SetComp(expr elt, comprehension* generators)
-         | DictComp(expr key, expr value, comprehension* generators)
-         | GeneratorExp(expr elt, comprehension* generators)
-         -- the grammar constrains where yield expressions can occur
-         | Await(expr value)
-         | Yield(expr? value)
-         | YieldFrom(expr value)
-         -- need sequences for compare to distinguish between
-         -- x < 4 < 3 and (x < 4) < 3
-         | Compare(expr left, cmpop* ops, expr* comparators)
-         | Call(expr func, expr* args, keyword* keywords)
-         | FormattedValue(expr value, int? conversion, expr? format_spec)
-         | JoinedStr(expr* values)
-         | Constant(constant value, string? kind)
-
-         -- the following expression can appear in assignment context
-         | Attribute(expr value, identifier attr, expr_context ctx)
-         | Subscript(expr value, slice slice, expr_context ctx)
-         | Starred(expr value, expr_context ctx)
-         | Name(identifier id, expr_context ctx)
-         | List(expr* elts, expr_context ctx)
-         | Tuple(expr* elts, expr_context ctx)
-
-          -- col_offset is the byte offset in the utf8 string the parser uses
-          attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-
-    expr_context = Load | Store | Del | AugLoad | AugStore | Param
-
-    slice = Slice(expr? lower, expr? upper, expr? step)
-          | ExtSlice(slice* dims)
-          | Index(expr value)
-
-    boolop = And | Or
-
-    operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
+	mod = Module(stmt* body)
+	    | Interactive(stmt* body)
+	    | Expression(expr body)
+
+	    -- not really an actual node but useful in Jython's typesystem.
+	    | Suite(stmt* body)
+
+	stmt = FunctionDef(identifier name, arguments args, 
+                            stmt* body, expr* decorator_list)
+	      | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list)
+	      | Return(expr? value)
+
+	      | Delete(expr* targets)
+	      | Assign(expr* targets, expr value)
+	      | AugAssign(expr target, operator op, expr value)
+
+	      -- not sure if bool is allowed, can always use int
+ 	      | Print(expr? dest, expr* values, bool nl)
+
+	      -- use 'orelse' because else is a keyword in target languages
+	      | For(expr target, expr iter, stmt* body, stmt* orelse)
+	      | While(expr test, stmt* body, stmt* orelse)
+	      | If(expr test, stmt* body, stmt* orelse)
+	      | With(expr context_expr, expr? optional_vars, stmt* body)
+
+	      -- 'type' is a bad name
+	      | Raise(expr? type, expr? inst, expr? tback)
+	      | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
+	      | TryFinally(stmt* body, stmt* finalbody)
+	      | Assert(expr test, expr? msg)
+
+	      | Import(alias* names)
+	      | ImportFrom(identifier? module, alias* names, int? level)
+
+	      -- Doesn't capture requirement that locals must be
+	      -- defined if globals is
+	      -- still supports use as a function!
+	      | Exec(expr body, expr? globals, expr? locals)
+
+	      | Global(identifier* names)
+	      | Expr(expr value)
+	      | Pass | Break | Continue
+
+	      -- XXX Jython will be different
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
+
+	      -- BoolOp() can use left & right?
+	expr = BoolOp(boolop op, expr* values)
+	     | BinOp(expr left, operator op, expr right)
+	     | UnaryOp(unaryop op, expr operand)
+	     | Lambda(arguments args, expr body)
+	     | IfExp(expr test, expr body, expr orelse)
+	     | Dict(expr* keys, expr* values)
+	     | Set(expr* elts)
+	     | ListComp(expr elt, comprehension* generators)
+	     | SetComp(expr elt, comprehension* generators)
+	     | DictComp(expr key, expr value, comprehension* generators)
+	     | GeneratorExp(expr elt, comprehension* generators)
+	     -- the grammar constrains where yield expressions can occur
+	     | Yield(expr? value)
+	     -- need sequences for compare to distinguish between
+	     -- x < 4 < 3 and (x < 4) < 3
+	     | Compare(expr left, cmpop* ops, expr* comparators)
+	     | Call(expr func, expr* args, keyword* keywords,
+			 expr? starargs, expr? kwargs)
+	     | Repr(expr value)
+	     | Num(object n) -- a number as a PyObject.
+	     | Str(string s) -- need to specify raw, unicode, etc?
+	     -- other literals? bools?
+
+	     -- the following expression can appear in assignment context
+	     | Attribute(expr value, identifier attr, expr_context ctx)
+	     | Subscript(expr value, slice slice, expr_context ctx)
+	     | Name(identifier id, expr_context ctx)
+	     | List(expr* elts, expr_context ctx) 
+	     | Tuple(expr* elts, expr_context ctx)
+
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
+
+	expr_context = Load | Store | Del | AugLoad | AugStore | Param
+
+	slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) 
+	      | ExtSlice(slice* dims) 
+	      | Index(expr value) 
+
+	boolop = And | Or 
+
+	operator = Add | Sub | Mult | Div | Mod | Pow | LShift 
                  | RShift | BitOr | BitXor | BitAnd | FloorDiv
 
-    unaryop = Invert | Not | UAdd | USub
+	unaryop = Invert | Not | UAdd | USub
 
-    cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
+	cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
 
-    comprehension = (expr target, expr iter, expr* ifs, int is_async)
+	comprehension = (expr target, expr iter, expr* ifs)
 
-    excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
-                    attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
+	-- not sure what to call the first argument for raise and except
+	excepthandler = ExceptHandler(expr? type, expr? name, stmt* body)
+	                attributes (int lineno, int col_offset)
 
-    arguments = (arg* posonlyargs, arg* args, arg? vararg, arg* kwonlyargs,
-                 expr* kw_defaults, arg? kwarg, expr* defaults)
+	arguments = (expr* args, identifier? vararg, 
+		     identifier? kwarg, expr* defaults)
 
-    arg = (identifier arg, expr? annotation, string? type_comment)
-           attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
+        -- keyword arguments supplied to call
+        keyword = (identifier arg, expr value)
 
-    -- keyword arguments supplied to call (NULL identifier for **kwargs)
-    keyword = (identifier? arg, expr value)
-
-    -- import name with optional 'as' alias.
-    alias = (identifier name, identifier? asname)
-
-    withitem = (expr context_expr, expr? optional_vars)
-
-    type_ignore = TypeIgnore(int lineno, string tag)
+        -- import name with optional 'as' alias.
+        alias = (identifier name, identifier? asname)
 }
diff --git a/Parser/acceler.c b/Parser/acceler.c
index e515833..9b14263 100644
--- a/Parser/acceler.c
+++ b/Parser/acceler.c
@@ -10,21 +10,22 @@
    are not part of the static data structure written on graminit.[ch]
    by the parser generator. */
 
-#include "Python.h"
+#include "pgenheaders.h"
 #include "grammar.h"
 #include "node.h"
 #include "token.h"
 #include "parser.h"
 
 /* Forward references */
-static void fixdfa(grammar *, const dfa *);
+static void fixdfa(grammar *, dfa *);
 static void fixstate(grammar *, state *);
 
 void
 PyGrammar_AddAccelerators(grammar *g)
 {
+    dfa *d;
     int i;
-    const dfa *d = g->g_dfa;
+    d = g->g_dfa;
     for (i = g->g_ndfas; --i >= 0; d++)
         fixdfa(g, d);
     g->g_accel = 1;
@@ -33,9 +34,10 @@ PyGrammar_AddAccelerators(grammar *g)
 void
 PyGrammar_RemoveAccelerators(grammar *g)
 {
+    dfa *d;
     int i;
     g->g_accel = 0;
-    const dfa *d = g->g_dfa;
+    d = g->g_dfa;
     for (i = g->g_ndfas; --i >= 0; d++) {
         state *s;
         int j;
@@ -49,7 +51,7 @@ PyGrammar_RemoveAccelerators(grammar *g)
 }
 
 static void
-fixdfa(grammar *g, const dfa *d)
+fixdfa(grammar *g, dfa *d)
 {
     state *s;
     int j;
@@ -61,7 +63,7 @@ fixdfa(grammar *g, const dfa *d)
 static void
 fixstate(grammar *g, state *s)
 {
-    const arc *a;
+    arc *a;
     int k;
     int *accel;
     int nl = g->g_ll.ll_nlabels;
@@ -76,14 +78,14 @@ fixstate(grammar *g, state *s)
     a = s->s_arc;
     for (k = s->s_narcs; --k >= 0; a++) {
         int lbl = a->a_lbl;
-        const label *l = &g->g_ll.ll_label[lbl];
+        label *l = &g->g_ll.ll_label[lbl];
         int type = l->lb_type;
         if (a->a_arrow >= (1 << 7)) {
             printf("XXX too many states!\n");
             continue;
         }
         if (ISNONTERMINAL(type)) {
-            const dfa *d1 = PyGrammar_FindDFA(g, type);
+            dfa *d1 = PyGrammar_FindDFA(g, type);
             int ibit;
             if (type - NT_OFFSET >= (1 << 7)) {
                 printf("XXX too high nonterminal number!\n");
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 62f5c19..1ddc3f8 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -1,53 +1,243 @@
-#-------------------------------------------------------------------------------
-# Parser for ASDL [1] definition files. Reads in an ASDL description and parses
-# it into an AST that describes it.
-#
-# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
-# modules and attributes after a product. Words starting with Capital letters
-# are terminals. Literal tokens are in "double quotes". Others are
-# non-terminals. Id is either TokenId or ConstructorId.
-#
-# module        ::= "module" Id "{" [definitions] "}"
-# definitions   ::= { TypeId "=" type }
-# type          ::= product | sum
-# product       ::= fields ["attributes" fields]
-# fields        ::= "(" { field, "," } field ")"
-# field         ::= TypeId ["?" | "*"] [Id]
-# sum           ::= constructor { "|" constructor } ["attributes" fields]
-# constructor   ::= ConstructorId [fields]
-#
-# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
-#     http://asdl.sourceforge.net/
-#-------------------------------------------------------------------------------
-from collections import namedtuple
-import re
-
-__all__ = [
-    'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
-    'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
-
-# The following classes define nodes into which the ASDL description is parsed.
-# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
-# structure used by a programming language. But ASDL files themselves need to be
-# parsed. This module parses ASDL files and uses a simple AST to represent them.
-# See the EBNF at the top of the file to understand the logical connection
-# between the various node types.
-
-builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
-                 'constant'}
-
-class AST:
+"""An implementation of the Zephyr Abstract Syntax Definition Language.
+
+See http://asdl.sourceforge.net/ and
+http://www.cs.princeton.edu/research/techreps/TR-554-97
+
+Only supports top level module decl, not view.  I'm guessing that view
+is intended to support the browser and I'm not interested in the
+browser.
+
+Changes for Python: Add support for module versions
+"""
+
+import os
+import traceback
+
+import spark
+
+class Token(object):
+    # spark seems to dispatch in the parser based on a token's
+    # type attribute
+    def __init__(self, type, lineno):
+        self.type = type
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.type
+
     def __repr__(self):
-        raise NotImplementedError
+        return str(self)
+
+class Id(Token):
+    def __init__(self, value, lineno):
+        self.type = 'Id'
+        self.value = value
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.value
+
+class String(Token):
+    def __init__(self, value, lineno):
+        self.type = 'String'
+        self.value = value
+        self.lineno = lineno
+
+class ASDLSyntaxError(Exception):
+
+    def __init__(self, lineno, token=None, msg=None):
+        self.lineno = lineno
+        self.token = token
+        self.msg = msg
+
+    def __str__(self):
+        if self.msg is None:
+            return "Error at '%s', line %d" % (self.token, self.lineno)
+        else:
+            return "%s, line %d" % (self.msg, self.lineno)
+
+class ASDLScanner(spark.GenericScanner, object):
+
+    def tokenize(self, input):
+        self.rv = []
+        self.lineno = 1
+        super(ASDLScanner, self).tokenize(input)
+        return self.rv
+
+    def t_id(self, s):
+        r"[\w\.]+"
+        # XXX doesn't distinguish upper vs. lower, which is
+        # significant for ASDL.
+        self.rv.append(Id(s, self.lineno))
+
+    def t_string(self, s):
+        r'"[^"]*"'
+        self.rv.append(String(s, self.lineno))
+
+    def t_xxx(self, s): # not sure what this production means
+        r"<="
+        self.rv.append(Token(s, self.lineno))
+
+    def t_punctuation(self, s):
+        r"[\{\}\*\=\|\(\)\,\?\:]"
+        self.rv.append(Token(s, self.lineno))
+
+    def t_comment(self, s):
+        r"\-\-[^\n]*"
+        pass
+
+    def t_newline(self, s):
+        r"\n"
+        self.lineno += 1
+
+    def t_whitespace(self, s):
+        r"[ \t]+"
+        pass
+
+    def t_default(self, s):
+        r" . +"
+        raise ValueError, "unmatched input: %s" % `s`
+
+class ASDLParser(spark.GenericParser, object):
+    def __init__(self):
+        super(ASDLParser, self).__init__("module")
+
+    def typestring(self, tok):
+        return tok.type
+
+    def error(self, tok):
+        raise ASDLSyntaxError(tok.lineno, tok)
+
+    def p_module_0(self, (module, name, version, _0, _1)):
+        " module ::= Id Id version { } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, None, version)
+
+    def p_module(self, (module, name, version, _0, definitions, _1)):
+        " module ::= Id Id version { definitions } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, definitions, version)
+
+    def p_version(self, (version, V)):
+        "version ::= Id String"
+        if version.value != "version":
+            raise ASDLSyntaxError(version.lineno,
+                                msg="expected 'version', found %s" % version)
+        return V
+
+    def p_definition_0(self, (definition,)):
+        " definitions ::= definition "
+        return definition
+
+    def p_definition_1(self, (definitions, definition)):
+        " definitions ::= definition definitions "
+        return definitions + definition
+
+    def p_definition(self, (id, _, type)):
+        " definition ::= Id = type "
+        return [Type(id, type)]
+
+    def p_type_0(self, (product,)):
+        " type ::= product "
+        return product
+
+    def p_type_1(self, (sum,)):
+        " type ::= sum "
+        return Sum(sum)
+
+    def p_type_2(self, (sum, id, _0, attributes, _1)):
+        " type ::= sum Id ( fields ) "
+        if id.value != "attributes":
+            raise ASDLSyntaxError(id.lineno,
+                                  msg="expected attributes, found %s" % id)
+        if attributes:
+            attributes.reverse()
+        return Sum(sum, attributes)
+
+    def p_product(self, (_0, fields, _1)):
+        " product ::= ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse()
+        return Product(fields)
+
+    def p_sum_0(self, (constructor,)):
+        " sum ::= constructor "
+        return [constructor]
+
+    def p_sum_1(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_sum_2(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_constructor_0(self, (id,)):
+        " constructor ::= Id "
+        return Constructor(id)
+
+    def p_constructor_1(self, (id, _0, fields, _1)):
+        " constructor ::= Id ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse()
+        return Constructor(id, fields)
+
+    def p_fields_0(self, (field,)):
+        " fields ::= field "
+        return [field]
+
+    def p_fields_1(self, (field, _, fields)):
+        " fields ::= field , fields "
+        return fields + [field]
+
+    def p_field_0(self, (type,)):
+        " field ::= Id "
+        return Field(type)
+
+    def p_field_1(self, (type, name)):
+        " field ::= Id Id "
+        return Field(type, name)
+
+    def p_field_2(self, (type, _, name)):
+        " field ::= Id * Id "
+        return Field(type, name, seq=True)
+
+    def p_field_3(self, (type, _, name)):
+        " field ::= Id ? Id "
+        return Field(type, name, opt=True)
+
+    def p_field_4(self, (type, _)):
+        " field ::= Id * "
+        return Field(type, seq=True)
+
+    def p_field_5(self, (type, _)):
+        " field ::= Id ? "
+        return Field(type, opt=True)
+
+builtin_types = ("identifier", "string", "int", "bool", "object")
+
+# below is a collection of classes to capture the AST of an AST :-)
+# not sure if any of the methods are useful yet, but I'm adding them
+# piecemeal as they seem helpful
+
+class AST(object):
+    pass # a marker class
 
 class Module(AST):
-    def __init__(self, name, dfns):
+    def __init__(self, name, dfns, version):
         self.name = name
         self.dfns = dfns
-        self.types = {type.name: type.value for type in dfns}
+        self.version = version
+        self.types = {} # maps type name to value (from dfns)
+        for type in dfns:
+            self.types[type.name.value] = type.value
 
     def __repr__(self):
-        return 'Module({0.name}, {0.dfns})'.format(self)
+        return "Module(%s, %s)" % (self.name, self.dfns)
 
 class Type(AST):
     def __init__(self, name, value):
@@ -55,7 +245,7 @@ class Type(AST):
         self.value = value
 
     def __repr__(self):
-        return 'Type({0.name}, {0.value})'.format(self)
+        return "Type(%s, %s)" % (self.name, self.value)
 
 class Constructor(AST):
     def __init__(self, name, fields=None):
@@ -63,7 +253,7 @@ class Constructor(AST):
         self.fields = fields or []
 
     def __repr__(self):
-        return 'Constructor({0.name}, {0.fields})'.format(self)
+        return "Constructor(%s, %s)" % (self.name, self.fields)
 
 class Field(AST):
     def __init__(self, type, name=None, seq=False, opt=False):
@@ -80,9 +270,9 @@ class Field(AST):
         else:
             extra = ""
         if self.name is None:
-            return 'Field({0.type}{1})'.format(self, extra)
+            return "Field(%s%s)" % (self.type, extra)
         else:
-            return 'Field({0.type}, {0.name}{1})'.format(self, extra)
+            return "Field(%s, %s%s)" % (self.type, self.name, extra)
 
 class Sum(AST):
     def __init__(self, types, attributes=None):
@@ -90,54 +280,56 @@ class Sum(AST):
         self.attributes = attributes or []
 
     def __repr__(self):
-        if self.attributes:
-            return 'Sum({0.types}, {0.attributes})'.format(self)
+        if self.attributes is None:
+            return "Sum(%s)" % self.types
         else:
-            return 'Sum({0.types})'.format(self)
+            return "Sum(%s, %s)" % (self.types, self.attributes)
 
 class Product(AST):
-    def __init__(self, fields, attributes=None):
+    def __init__(self, fields):
         self.fields = fields
-        self.attributes = attributes or []
 
     def __repr__(self):
-        if self.attributes:
-            return 'Product({0.fields}, {0.attributes})'.format(self)
-        else:
-            return 'Product({0.fields})'.format(self)
-
-# A generic visitor for the meta-AST that describes ASDL. This can be used by
-# emitters. Note that this visitor does not provide a generic visit method, so a
-# subclass needs to define visit methods from visitModule to as deep as the
-# interesting node.
-# We also define a Check visitor that makes sure the parsed ASDL is well-formed.
+        return "Product(%s)" % self.fields
 
 class VisitorBase(object):
-    """Generic tree visitor for ASTs."""
-    def __init__(self):
+
+    def __init__(self, skip=False):
         self.cache = {}
+        self.skip = skip
 
-    def visit(self, obj, *args):
-        klass = obj.__class__
+    def visit(self, object, *args):
+        meth = self._dispatch(object)
+        if meth is None:
+            return
+        try:
+            meth(object, *args)
+        except Exception, err:
+            print "Error visiting", repr(object)
+            print err
+            traceback.print_exc()
+            # XXX hack
+            if hasattr(self, 'file'):
+                self.file.flush()
+            os._exit(1)
+
+    def _dispatch(self, object):
+        assert isinstance(object, AST), repr(object)
+        klass = object.__class__
         meth = self.cache.get(klass)
         if meth is None:
             methname = "visit" + klass.__name__
-            meth = getattr(self, methname, None)
+            if self.skip:
+                meth = getattr(self, methname, None)
+            else:
+                meth = getattr(self, methname)
             self.cache[klass] = meth
-        if meth:
-            try:
-                meth(obj, *args)
-            except Exception as e:
-                print("Error visiting %r: %s" % (obj, e))
-                raise
+        return meth
 
 class Check(VisitorBase):
-    """A visitor that checks a parsed ASDL tree for correctness.
 
-    Errors are printed and accumulated.
-    """
     def __init__(self):
-        super(Check, self).__init__()
+        super(Check, self).__init__(skip=True)
         self.cons = {}
         self.errors = 0
         self.types = {}
@@ -159,8 +351,8 @@ class Check(VisitorBase):
         if conflict is None:
             self.cons[key] = name
         else:
-            print('Redefinition of constructor {}'.format(key))
-            print('Defined in {} and {}'.format(conflict, name))
+            print "Redefinition of constructor %s" % key
+            print "Defined in %s and %s" % (conflict, name)
             self.errors += 1
         for f in cons.fields:
             self.visit(f, key)
@@ -175,11 +367,6 @@ class Check(VisitorBase):
             self.visit(f, name)
 
 def check(mod):
-    """Check the parsed ASDL tree for correctness.
-
-    Return True if success. For failure, the errors are printed out and False
-    is returned.
-    """
     v = Check()
     v.visit(mod)
 
@@ -187,190 +374,40 @@ def check(mod):
         if t not in mod.types and not t in builtin_types:
             v.errors += 1
             uses = ", ".join(v.types[t])
-            print('Undefined type {}, used in {}'.format(t, uses))
-    return not v.errors
-
-# The ASDL parser itself comes next. The only interesting external interface
-# here is the top-level parse function.
-
-def parse(filename):
-    """Parse ASDL from the given file and return a Module node describing it."""
-    with open(filename) as f:
-        parser = ASDLParser()
-        return parser.parse(f.read())
-
-# Types for describing tokens in an ASDL specification.
-class TokenKind:
-    """TokenKind is provides a scope for enumerated token kinds."""
-    (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
-     LParen, RParen, LBrace, RBrace) = range(11)
+            print "Undefined type %s, used in %s" % (t, uses)
 
-    operator_table = {
-        '=': Equals, ',': Comma,    '?': Question, '|': Pipe,    '(': LParen,
-        ')': RParen, '*': Asterisk, '{': LBrace,   '}': RBrace}
-
-Token = namedtuple('Token', 'kind value lineno')
-
-class ASDLSyntaxError(Exception):
-    def __init__(self, msg, lineno=None):
-        self.msg = msg
-        self.lineno = lineno or '<unknown>'
-
-    def __str__(self):
-        return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
-
-def tokenize_asdl(buf):
-    """Tokenize the given buffer. Yield Token objects."""
-    for lineno, line in enumerate(buf.splitlines(), 1):
-        for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
-            c = m.group(1)
-            if c[0].isalpha():
-                # Some kind of identifier
-                if c[0].isupper():
-                    yield Token(TokenKind.ConstructorId, c, lineno)
-                else:
-                    yield Token(TokenKind.TypeId, c, lineno)
-            elif c[:2] == '--':
-                # Comment
-                break
-            else:
-                # Operators
-                try:
-                    op_kind = TokenKind.operator_table[c]
-                except KeyError:
-                    raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
-                yield Token(op_kind, c, lineno)
-
-class ASDLParser:
-    """Parser for ASDL files.
-
-    Create, then call the parse method on a buffer containing ASDL.
-    This is a simple recursive descent parser that uses tokenize_asdl for the
-    lexing.
-    """
-    def __init__(self):
-        self._tokenizer = None
-        self.cur_token = None
-
-    def parse(self, buf):
-        """Parse the ASDL in the buffer and return an AST with a Module root.
-        """
-        self._tokenizer = tokenize_asdl(buf)
-        self._advance()
-        return self._parse_module()
-
-    def _parse_module(self):
-        if self._at_keyword('module'):
-            self._advance()
-        else:
-            raise ASDLSyntaxError(
-                'Expected "module" (found {})'.format(self.cur_token.value),
-                self.cur_token.lineno)
-        name = self._match(self._id_kinds)
-        self._match(TokenKind.LBrace)
-        defs = self._parse_definitions()
-        self._match(TokenKind.RBrace)
-        return Module(name, defs)
-
-    def _parse_definitions(self):
-        defs = []
-        while self.cur_token.kind == TokenKind.TypeId:
-            typename = self._advance()
-            self._match(TokenKind.Equals)
-            type = self._parse_type()
-            defs.append(Type(typename, type))
-        return defs
-
-    def _parse_type(self):
-        if self.cur_token.kind == TokenKind.LParen:
-            # If we see a (, it's a product
-            return self._parse_product()
-        else:
-            # Otherwise it's a sum. Look for ConstructorId
-            sumlist = [Constructor(self._match(TokenKind.ConstructorId),
-                                   self._parse_optional_fields())]
-            while self.cur_token.kind  == TokenKind.Pipe:
-                # More constructors
-                self._advance()
-                sumlist.append(Constructor(
-                                self._match(TokenKind.ConstructorId),
-                                self._parse_optional_fields()))
-            return Sum(sumlist, self._parse_optional_attributes())
-
-    def _parse_product(self):
-        return Product(self._parse_fields(), self._parse_optional_attributes())
-
-    def _parse_fields(self):
-        fields = []
-        self._match(TokenKind.LParen)
-        while self.cur_token.kind == TokenKind.TypeId:
-            typename = self._advance()
-            is_seq, is_opt = self._parse_optional_field_quantifier()
-            id = (self._advance() if self.cur_token.kind in self._id_kinds
-                                  else None)
-            fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
-            if self.cur_token.kind == TokenKind.RParen:
-                break
-            elif self.cur_token.kind == TokenKind.Comma:
-                self._advance()
-        self._match(TokenKind.RParen)
-        return fields
-
-    def _parse_optional_fields(self):
-        if self.cur_token.kind == TokenKind.LParen:
-            return self._parse_fields()
-        else:
-            return None
+    return not v.errors
 
-    def _parse_optional_attributes(self):
-        if self._at_keyword('attributes'):
-            self._advance()
-            return self._parse_fields()
+def parse(file):
+    scanner = ASDLScanner()
+    parser = ASDLParser()
+
+    buf = open(file).read()
+    tokens = scanner.tokenize(buf)
+    try:
+        return parser.parse(tokens)
+    except ASDLSyntaxError, err:
+        print err
+        lines = buf.split("\n")
+        print lines[err.lineno - 1] # lines starts at 0, files at 1
+
+if __name__ == "__main__":
+    import glob
+    import sys
+
+    if len(sys.argv) > 1:
+        files = sys.argv[1:]
+    else:
+        testdir = "tests"
+        files = glob.glob(testdir + "/*.asdl")
+
+    for file in files:
+        print file
+        mod = parse(file)
+        print "module", mod.name
+        print len(mod.dfns), "definitions"
+        if not check(mod):
+            print "Check failed"
         else:
-            return None
-
-    def _parse_optional_field_quantifier(self):
-        is_seq, is_opt = False, False
-        if self.cur_token.kind == TokenKind.Asterisk:
-            is_seq = True
-            self._advance()
-        elif self.cur_token.kind == TokenKind.Question:
-            is_opt = True
-            self._advance()
-        return is_seq, is_opt
-
-    def _advance(self):
-        """ Return the value of the current token and read the next one into
-            self.cur_token.
-        """
-        cur_val = None if self.cur_token is None else self.cur_token.value
-        try:
-            self.cur_token = next(self._tokenizer)
-        except StopIteration:
-            self.cur_token = None
-        return cur_val
-
-    _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
-
-    def _match(self, kind):
-        """The 'match' primitive of RD parsers.
-
-        * Verifies that the current token is of the given kind (kind can
-          be a tuple, in which the kind must match one of its members).
-        * Returns the value of the current token
-        * Reads in the next token
-        """
-        if (isinstance(kind, tuple) and self.cur_token.kind in kind or
-            self.cur_token.kind == kind
-            ):
-            value = self.cur_token.value
-            self._advance()
-            return value
-        else:
-            raise ASDLSyntaxError(
-                'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
-                self.cur_token.lineno)
-
-    def _at_keyword(self, keyword):
-        return (self.cur_token.kind == TokenKind.TypeId and
-                self.cur_token.value == keyword)
+            for dfn in mod.dfns:
+                print dfn.type
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 52495e9..ac61c78 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -1,18 +1,25 @@
 #! /usr/bin/env python
 """Generate C code from an ASDL description."""
 
+# TO DO
+# handle fields that have a type but no name
+
 import os, sys
 
 import asdl
 
-TABSIZE = 4
+TABSIZE = 8
 MAX_COL = 80
 
 def get_c_type(name):
     """Return a string for the C name of the type.
 
-    This function special cases the default types provided by asdl.
+    This function special cases the default types provided by asdl:
+    identifier, string, int, bool.
     """
+    # XXX ack!  need to figure out where Id is useful and where string
+    if isinstance(name, asdl.Id):
+        name = name.value
     if name in asdl.builtin_types:
         return name
     else:
@@ -77,20 +84,8 @@ class EmitVisitor(asdl.VisitorBase):
 
     def __init__(self, file):
         self.file = file
-        self.identifiers = set()
-        self.singletons = set()
-        self.types = set()
         super(EmitVisitor, self).__init__()
 
-    def emit_identifier(self, name):
-        self.identifiers.add(str(name))
-
-    def emit_singleton(self, name):
-        self.singletons.add(str(name))
-
-    def emit_type(self, name):
-        self.types.add(str(name))
-
     def emit(self, s, depth, reflow=True):
         # XXX reflow long lines?
         if reflow:
@@ -98,9 +93,8 @@ class EmitVisitor(asdl.VisitorBase):
         else:
             lines = [s]
         for line in lines:
-            if line:
-                line = (" " * TABSIZE * depth) + line
-            self.file.write(line + "\n")
+            line = (" " * TABSIZE * depth) + line + "\n"
+            self.file.write(line)
 
 
 class TypeDefVisitor(EmitVisitor):
@@ -186,6 +180,9 @@ class StructVisitor(EmitVisitor):
                 self.visit(f, depth + 1)
             self.emit("} %s;" % cons.name, depth)
             self.emit("", depth)
+        else:
+            # XXX not sure what I want here, nothing is probably fine
+            pass
 
     def visitField(self, field, depth):
         # XXX need to lookup field.type, because it might be something
@@ -193,7 +190,7 @@ class StructVisitor(EmitVisitor):
         ctype = get_c_type(field.type)
         name = field.name
         if field.seq:
-            if field.type == 'cmpop':
+            if field.type.value in ('cmpop',):
                 self.emit("asdl_int_seq *%(name)s;" % locals(), depth)
             else:
                 self.emit("asdl_seq *%(name)s;" % locals(), depth)
@@ -204,11 +201,6 @@ class StructVisitor(EmitVisitor):
         self.emit("struct _%(name)s {" % locals(), depth)
         for f in product.fields:
             self.visit(f, depth + 1)
-        for field in product.attributes:
-            # rudimentary attribute handling
-            type = str(field.type)
-            assert type in asdl.builtin_types, type
-            self.emit("%s %s;" % (type, field.name), depth + 1);
         self.emit("};", depth)
         self.emit("", depth)
 
@@ -248,7 +240,7 @@ class PrototypeVisitor(EmitVisitor):
                 name = f.name
             # XXX should extend get_c_type() to handle this
             if f.seq:
-                if f.type == 'cmpop':
+                if f.type.value in ('cmpop',):
                     ctype = "asdl_int_seq *"
                 else:
                     ctype = "asdl_seq *"
@@ -280,9 +272,7 @@ class PrototypeVisitor(EmitVisitor):
 
     def visitProduct(self, prod, name):
         self.emit_function(name, get_c_type(name),
-                           self.get_args(prod.fields),
-                           self.get_args(prod.attributes),
-                           union=False)
+                           self.get_args(prod.fields), [], union=False)
 
 
 class FunctionVisitor(PrototypeVisitor):
@@ -302,7 +292,8 @@ class FunctionVisitor(PrototypeVisitor):
         emit("{")
         emit("%s p;" % ctype, 1)
         for argtype, argname, opt in args:
-            if not opt and argtype != "int":
+            # XXX hack alert: false is allowed for a bool
+            if not opt and not (argtype == "bool" or argtype == "int"):
                 emit("if (!%s) {" % argname, 1)
                 emit("PyErr_SetString(PyExc_ValueError,", 2)
                 msg = "field %s is required for %s" % (argname, name)
@@ -336,8 +327,7 @@ class FunctionVisitor(PrototypeVisitor):
             self.emit(s, depth, reflow)
         for argtype, argname, opt in args:
             emit("p->%s = %s;" % (argname, argname), 1)
-        for argtype, argname, opt in attrs:
-            emit("p->%s = %s;" % (argname, argname), 1)
+        assert not attrs
 
 
 class PickleVisitor(EmitVisitor):
@@ -376,18 +366,20 @@ class Obj2ModVisitor(PickleVisitor):
         self.emit("int", 0)
         self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
         self.emit("{", 0)
+        self.emit("PyObject* tmp = NULL;", 1)
         self.emit("int isinstance;", 1)
         self.emit("", 0)
 
-    def sumTrailer(self, name, add_label=False):
+    def sumTrailer(self, name):
         self.emit("", 0)
+        self.emit("tmp = PyObject_Repr(obj);", 1)
         # there's really nothing more we can do if this fails ...
-        error = "expected some sort of %s, but got %%R" % name
-        format = "PyErr_Format(PyExc_TypeError, \"%s\", obj);"
+        self.emit("if (tmp == NULL) goto failed;", 1)
+        error = "expected some sort of %s, but got %%.400s" % name
+        format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));"
         self.emit(format % error, 1, reflow=False)
-        if add_label:
-            self.emit("failed:", 1)
-            self.emit("Py_XDECREF(tmp);", 1)
+        self.emit("failed:", 0)
+        self.emit("Py_XDECREF(tmp);", 1)
         self.emit("return 1;", 1)
         self.emit("}", 0)
         self.emit("", 0)
@@ -396,7 +388,7 @@ class Obj2ModVisitor(PickleVisitor):
         self.funcHeader(name)
         for t in sum.types:
             line = ("isinstance = PyObject_IsInstance(obj, "
-                    "astmodulestate_global->%s_type);")
+                    "(PyObject *)%s_type);")
             self.emit(line % (t.name,), 1)
             self.emit("if (isinstance == -1) {", 1)
             self.emit("return 1;", 2)
@@ -412,8 +404,6 @@ class Obj2ModVisitor(PickleVisitor):
 
     def complexSum(self, sum, name):
         self.funcHeader(name)
-        self.emit("PyObject *tmp = NULL;", 1)
-        self.emit("PyObject *tp;", 1)
         for a in sum.attributes:
             self.visitAttributeDeclaration(a, name, sum=sum)
         self.emit("", 0)
@@ -425,8 +415,8 @@ class Obj2ModVisitor(PickleVisitor):
         for a in sum.attributes:
             self.visitField(a, name, sum=sum, depth=1)
         for t in sum.types:
-            self.emit("tp = astmodulestate_global->%s_type;" % (t.name,), 1)
-            self.emit("isinstance = PyObject_IsInstance(obj, tp);", 1)
+            line = "isinstance = PyObject_IsInstance(obj, (PyObject*)%s_type);"
+            self.emit(line % (t.name,), 1)
             self.emit("if (isinstance == -1) {", 1)
             self.emit("return 1;", 2)
             self.emit("}", 1)
@@ -436,12 +426,12 @@ class Obj2ModVisitor(PickleVisitor):
             self.emit("", 0)
             for f in t.fields:
                 self.visitField(f, t.name, sum=sum, depth=2)
-            args = [f.name for f in t.fields] + [a.name for a in sum.attributes]
+            args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes]
             self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2)
             self.emit("if (*out == NULL) goto failed;", 2)
             self.emit("return 0;", 2)
             self.emit("}", 1)
-        self.sumTrailer(name, True)
+        self.sumTrailer(name)
 
     def visitAttributeDeclaration(self, a, name, sum=sum):
         ctype = get_c_type(a.type)
@@ -461,15 +451,10 @@ class Obj2ModVisitor(PickleVisitor):
         self.emit("PyObject* tmp = NULL;", 1)
         for f in prod.fields:
             self.visitFieldDeclaration(f, name, prod=prod, depth=1)
-        for a in prod.attributes:
-            self.visitFieldDeclaration(a, name, prod=prod, depth=1)
         self.emit("", 0)
         for f in prod.fields:
             self.visitField(f, name, prod=prod, depth=1)
-        for a in prod.attributes:
-            self.visitField(a, name, prod=prod, depth=1)
-        args = [f.name for f in prod.fields]
-        args.extend([a.name for a in prod.attributes])
+        args = [f.name.value for f in prod.fields]
         self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1)
         self.emit("return 0;", 1)
         self.emit("failed:", 0)
@@ -491,8 +476,8 @@ class Obj2ModVisitor(PickleVisitor):
 
     def isSimpleSum(self, field):
         # XXX can the members of this list be determined automatically?
-        return field.type in ('expr_context', 'boolop', 'operator',
-                              'unaryop', 'cmpop')
+        return field.type.value in ('expr_context', 'boolop', 'operator',
+                                    'unaryop', 'cmpop')
 
     def isNumeric(self, field):
         return get_c_type(field.type) in ("int", "bool")
@@ -502,52 +487,31 @@ class Obj2ModVisitor(PickleVisitor):
 
     def visitField(self, field, name, sum=None, prod=None, depth=0):
         ctype = get_c_type(field.type)
-        line = "if (_PyObject_LookupAttr(obj, astmodulestate_global->%s, &tmp) < 0) {"
-        self.emit(line % field.name, depth)
-        self.emit("return 1;", depth+1)
-        self.emit("}", depth)
-        if not field.opt:
-            self.emit("if (tmp == NULL) {", depth)
-            message = "required field \\\"%s\\\" missing from %s" % (field.name, name)
-            format = "PyErr_SetString(PyExc_TypeError, \"%s\");"
-            self.emit(format % message, depth+1, reflow=False)
-            self.emit("return 1;", depth+1)
-        else:
-            self.emit("if (tmp == NULL || tmp == Py_None) {", depth)
-            self.emit("Py_CLEAR(tmp);", depth+1)
-            if self.isNumeric(field):
-                self.emit("%s = 0;" % field.name, depth+1)
-            elif not self.isSimpleType(field):
-                self.emit("%s = NULL;" % field.name, depth+1)
-            else:
-                raise TypeError("could not determine the default value for %s" % field.name)
-        self.emit("}", depth)
-        self.emit("else {", depth)
-
+        self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
         self.emit("int res;", depth+1)
         if field.seq:
             self.emit("Py_ssize_t len;", depth+1)
             self.emit("Py_ssize_t i;", depth+1)
+        self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
+        self.emit("if (tmp == NULL) goto failed;", depth+1)
+        if field.seq:
             self.emit("if (!PyList_Check(tmp)) {", depth+1)
             self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must "
-                      "be a list, not a %%.200s\", _PyType_Name(Py_TYPE(tmp)));" %
+                      "be a list, not a %%.200s\", tmp->ob_type->tp_name);" %
                       (name, field.name),
                       depth+2, reflow=False)
             self.emit("goto failed;", depth+2)
             self.emit("}", depth+1)
             self.emit("len = PyList_GET_SIZE(tmp);", depth+1)
             if self.isSimpleType(field):
-                self.emit("%s = _Py_asdl_int_seq_new(len, arena);" % field.name, depth+1)
+                self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1)
             else:
-                self.emit("%s = _Py_asdl_seq_new(len, arena);" % field.name, depth+1)
+                self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1)
             self.emit("if (%s == NULL) goto failed;" % field.name, depth+1)
             self.emit("for (i = 0; i < len; i++) {", depth+1)
             self.emit("%s val;" % ctype, depth+2)
-            self.emit("PyObject *tmp2 = PyList_GET_ITEM(tmp, i);", depth+2)
-            self.emit("Py_INCREF(tmp2);", depth+2)
-            self.emit("res = obj2ast_%s(tmp2, &val, arena);" %
+            self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &val, arena);" %
                       field.type, depth+2, reflow=False)
-            self.emit("Py_DECREF(tmp2);", depth+2)
             self.emit("if (res != 0) goto failed;", depth+2)
             self.emit("if (len != PyList_GET_SIZE(tmp)) {", depth+2)
             self.emit("PyErr_SetString(PyExc_RuntimeError, \"%s field \\\"%s\\\" "
@@ -563,7 +527,21 @@ class Obj2ModVisitor(PickleVisitor):
                       (field.type, field.name), depth+1)
             self.emit("if (res != 0) goto failed;", depth+1)
 
-        self.emit("Py_CLEAR(tmp);", depth+1)
+        self.emit("Py_XDECREF(tmp);", depth+1)
+        self.emit("tmp = NULL;", depth+1)
+        self.emit("} else {", depth)
+        if not field.opt:
+            message = "required field \\\"%s\\\" missing from %s" % (field.name, name)
+            format = "PyErr_SetString(PyExc_TypeError, \"%s\");"
+            self.emit(format % message, depth+1, reflow=False)
+            self.emit("return 1;", depth+1)
+        else:
+            if self.isNumeric(field):
+                self.emit("%s = 0;" % field.name, depth+1)
+            elif not self.isSimpleType(field):
+                self.emit("%s = NULL;" % field.name, depth+1)
+            else:
+                raise TypeError("could not determine the default value for %s" % field.name)
         self.emit("}", depth)
 
 
@@ -580,46 +558,37 @@ class MarshalPrototypeVisitor(PickleVisitor):
 class PyTypesDeclareVisitor(PickleVisitor):
 
     def visitProduct(self, prod, name):
-        self.emit_type("%s_type" % name)
+        self.emit("static PyTypeObject *%s_type;" % name, 0)
         self.emit("static PyObject* ast2obj_%s(void*);" % name, 0)
-        if prod.attributes:
-            for a in prod.attributes:
-                self.emit_identifier(a.name)
-            self.emit("static const char * const %s_attributes[] = {" % name, 0)
-            for a in prod.attributes:
-                self.emit('"%s",' % a.name, 1)
-            self.emit("};", 0)
         if prod.fields:
-            for f in prod.fields:
-                self.emit_identifier(f.name)
-            self.emit("static const char * const %s_fields[]={" % name,0)
+            self.emit("static char *%s_fields[]={" % name,0)
             for f in prod.fields:
                 self.emit('"%s",' % f.name, 1)
             self.emit("};", 0)
 
     def visitSum(self, sum, name):
-        self.emit_type("%s_type" % name)
+        self.emit("static PyTypeObject *%s_type;" % name, 0)
         if sum.attributes:
-            for a in sum.attributes:
-                self.emit_identifier(a.name)
-            self.emit("static const char * const %s_attributes[] = {" % name, 0)
+            self.emit("static char *%s_attributes[] = {" % name, 0)
             for a in sum.attributes:
                 self.emit('"%s",' % a.name, 1)
             self.emit("};", 0)
         ptype = "void*"
         if is_simple(sum):
             ptype = get_c_type(name)
+            tnames = []
             for t in sum.types:
-                self.emit_singleton("%s_singleton" % t.name)
+                tnames.append(str(t.name)+"_singleton")
+            tnames = ", *".join(tnames)
+            self.emit("static PyObject *%s;" % tnames, 0)
         self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0)
         for t in sum.types:
             self.visitConstructor(t, name)
 
     def visitConstructor(self, cons, name):
+        self.emit("static PyTypeObject *%s_type;" % cons.name, 0)
         if cons.fields:
-            for t in cons.fields:
-                self.emit_identifier(t.name)
-            self.emit("static const char * const %s_fields[]={" % cons.name, 0)
+            self.emit("static char *%s_fields[]={" % cons.name, 0)
             for t in cons.fields:
                 self.emit('"%s",' % t.name, 1)
             self.emit("};",0)
@@ -628,74 +597,43 @@ class PyTypesVisitor(PickleVisitor):
 
     def visitModule(self, mod):
         self.emit("""
-
-typedef struct {
-    PyObject_HEAD
-    PyObject *dict;
-} AST_object;
-
-static void
-ast_dealloc(AST_object *self)
-{
-    /* bpo-31095: UnTrack is needed before calling any callbacks */
-    PyTypeObject *tp = Py_TYPE(self);
-    PyObject_GC_UnTrack(self);
-    Py_CLEAR(self->dict);
-    freefunc free_func = PyType_GetSlot(tp, Py_tp_free);
-    assert(free_func != NULL);
-    free_func(self);
-    Py_DECREF(tp);
-}
-
-static int
-ast_traverse(AST_object *self, visitproc visit, void *arg)
-{
-    Py_VISIT(self->dict);
-    return 0;
-}
-
-static int
-ast_clear(AST_object *self)
-{
-    Py_CLEAR(self->dict);
-    return 0;
-}
-
 static int
 ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
 {
     Py_ssize_t i, numfields = 0;
     int res = -1;
     PyObject *key, *value, *fields;
-    if (_PyObject_LookupAttr((PyObject*)Py_TYPE(self), astmodulestate_global->_fields, &fields) < 0) {
-        goto cleanup;
-    }
+    fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields");
+    if (!fields)
+        PyErr_Clear();
     if (fields) {
         numfields = PySequence_Size(fields);
         if (numfields == -1)
             goto cleanup;
     }
-
     res = 0; /* if no error occurs, this stays 0 to the end */
-    if (numfields < PyTuple_GET_SIZE(args)) {
-        PyErr_Format(PyExc_TypeError, "%.400s constructor takes at most "
-                     "%zd positional argument%s",
-                     _PyType_Name(Py_TYPE(self)),
-                     numfields, numfields == 1 ? "" : "s");
-        res = -1;
-        goto cleanup;
-    }
-    for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
-        /* cannot be reached when fields is NULL */
-        PyObject *name = PySequence_GetItem(fields, i);
-        if (!name) {
+    if (PyTuple_GET_SIZE(args) > 0) {
+        if (numfields != PyTuple_GET_SIZE(args)) {
+            PyErr_Format(PyExc_TypeError, "%.400s constructor takes %s"
+                         "%zd positional argument%s",
+                         Py_TYPE(self)->tp_name,
+                         numfields == 0 ? "" : "either 0 or ",
+                         numfields, numfields == 1 ? "" : "s");
             res = -1;
             goto cleanup;
         }
-        res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i));
-        Py_DECREF(name);
-        if (res < 0)
-            goto cleanup;
+        for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
+            /* cannot be reached when fields is NULL */
+            PyObject *name = PySequence_GetItem(fields, i);
+            if (!name) {
+                res = -1;
+                goto cleanup;
+            }
+            res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i));
+            Py_DECREF(name);
+            if (res < 0)
+                goto cleanup;
+        }
     }
     if (kw) {
         i = 0;  /* needed by PyDict_Next */
@@ -714,95 +652,105 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
 static PyObject *
 ast_type_reduce(PyObject *self, PyObject *unused)
 {
-    PyObject *dict;
-    if (_PyObject_LookupAttr(self, astmodulestate_global->__dict__, &dict) < 0) {
-        return NULL;
+    PyObject *res;
+    PyObject *dict = PyObject_GetAttrString(self, "__dict__");
+    if (dict == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_AttributeError))
+            PyErr_Clear();
+        else
+            return NULL;
     }
     if (dict) {
-        return Py_BuildValue("O()N", Py_TYPE(self), dict);
+        res = Py_BuildValue("O()O", Py_TYPE(self), dict);
+        Py_DECREF(dict);
+        return res;
     }
     return Py_BuildValue("O()", Py_TYPE(self));
 }
 
-static PyMemberDef ast_type_members[] = {
-    {"__dictoffset__", T_PYSSIZET, offsetof(AST_object, dict), READONLY},
-    {NULL}  /* Sentinel */
-};
-
 static PyMethodDef ast_type_methods[] = {
     {"__reduce__", ast_type_reduce, METH_NOARGS, NULL},
     {NULL}
 };
 
-static PyGetSetDef ast_type_getsets[] = {
-    {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict},
-    {NULL}
-};
-
-static PyType_Slot AST_type_slots[] = {
-    {Py_tp_dealloc, ast_dealloc},
-    {Py_tp_getattro, PyObject_GenericGetAttr},
-    {Py_tp_setattro, PyObject_GenericSetAttr},
-    {Py_tp_traverse, ast_traverse},
-    {Py_tp_clear, ast_clear},
-    {Py_tp_members, ast_type_members},
-    {Py_tp_methods, ast_type_methods},
-    {Py_tp_getset, ast_type_getsets},
-    {Py_tp_init, ast_type_init},
-    {Py_tp_alloc, PyType_GenericAlloc},
-    {Py_tp_new, PyType_GenericNew},
-    {Py_tp_free, PyObject_GC_Del},
-    {0, 0},
-};
-
-static PyType_Spec AST_type_spec = {
+static PyTypeObject AST_type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "_ast.AST",
-    sizeof(AST_object),
+    sizeof(PyObject),
     0,
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
-    AST_type_slots
+    0,                       /* tp_dealloc */
+    0,                       /* tp_print */
+    0,                       /* tp_getattr */
+    0,                       /* tp_setattr */
+    0,                       /* tp_compare */
+    0,                       /* tp_repr */
+    0,                       /* tp_as_number */
+    0,                       /* tp_as_sequence */
+    0,                       /* tp_as_mapping */
+    0,                       /* tp_hash */
+    0,                       /* tp_call */
+    0,                       /* tp_str */
+    PyObject_GenericGetAttr, /* tp_getattro */
+    PyObject_GenericSetAttr, /* tp_setattro */
+    0,                       /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+    0,                       /* tp_doc */
+    0,                       /* tp_traverse */
+    0,                       /* tp_clear */
+    0,                       /* tp_richcompare */
+    0,                       /* tp_weaklistoffset */
+    0,                       /* tp_iter */
+    0,                       /* tp_iternext */
+    ast_type_methods,        /* tp_methods */
+    0,                       /* tp_members */
+    0,                       /* tp_getset */
+    0,                       /* tp_base */
+    0,                       /* tp_dict */
+    0,                       /* tp_descr_get */
+    0,                       /* tp_descr_set */
+    0,                       /* tp_dictoffset */
+    (initproc)ast_type_init, /* tp_init */
+    PyType_GenericAlloc,     /* tp_alloc */
+    PyType_GenericNew,       /* tp_new */
+    PyObject_Del,            /* tp_free */
 };
 
-static PyObject *
-make_type(const char *type, PyObject* base, const char* const* fields, int num_fields)
+
+static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields)
 {
     PyObject *fnames, *result;
     int i;
     fnames = PyTuple_New(num_fields);
     if (!fnames) return NULL;
     for (i = 0; i < num_fields; i++) {
-        PyObject *field = PyUnicode_InternFromString(fields[i]);
+        PyObject *field = PyString_FromString(fields[i]);
         if (!field) {
             Py_DECREF(fnames);
             return NULL;
         }
         PyTuple_SET_ITEM(fnames, i, field);
     }
-    result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){OOOO}",
-                    type, base,
-                    astmodulestate_global->_fields, fnames,
-                    astmodulestate_global->__module__,
-                    astmodulestate_global->_ast);
+    result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}",
+                    type, base, "_fields", fnames, "__module__", "_ast");
     Py_DECREF(fnames);
-    return result;
+    return (PyTypeObject*)result;
 }
 
-static int
-add_attributes(PyObject *type, const char * const *attrs, int num_fields)
+static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
 {
     int i, result;
     PyObject *s, *l = PyTuple_New(num_fields);
     if (!l)
         return 0;
     for (i = 0; i < num_fields; i++) {
-        s = PyUnicode_InternFromString(attrs[i]);
+        s = PyString_FromString(attrs[i]);
         if (!s) {
             Py_DECREF(l);
             return 0;
         }
         PyTuple_SET_ITEM(l, i, s);
     }
-    result = PyObject_SetAttr(type, astmodulestate_global->_attributes, l) >= 0;
+    result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0;
     Py_DECREF(l);
     return result;
 }
@@ -811,7 +759,7 @@ add_attributes(PyObject *type, const char * const *attrs, int num_fields)
 
 static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
 {
-    Py_ssize_t i, n = asdl_seq_LEN(seq);
+    int i, n = asdl_seq_LEN(seq);
     PyObject *result = PyList_New(n);
     PyObject *value;
     if (!result)
@@ -834,15 +782,16 @@ static PyObject* ast2obj_object(void *o)
     Py_INCREF((PyObject*)o);
     return (PyObject*)o;
 }
-#define ast2obj_singleton ast2obj_object
-#define ast2obj_constant ast2obj_object
 #define ast2obj_identifier ast2obj_object
 #define ast2obj_string ast2obj_object
-#define ast2obj_bytes ast2obj_object
+static PyObject* ast2obj_bool(bool b)
+{
+    return PyBool_FromLong(b);
+}
 
 static PyObject* ast2obj_int(long b)
 {
-    return PyLong_FromLong(b);
+    return PyInt_FromLong(b);
 }
 
 /* Conversion Python -> AST */
@@ -851,32 +800,18 @@ static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
 {
     if (obj == Py_None)
         obj = NULL;
-    if (obj) {
-        if (PyArena_AddPyObject(arena, obj) < 0) {
-            *out = NULL;
-            return -1;
-        }
-        Py_INCREF(obj);
-    }
-    *out = obj;
-    return 0;
-}
-
-static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
-{
-    if (PyArena_AddPyObject(arena, obj) < 0) {
-        *out = NULL;
-        return -1;
-    }
-    Py_INCREF(obj);
+    if (obj)
+        PyArena_AddPyObject(arena, obj);
+    Py_XINCREF(obj);
     *out = obj;
     return 0;
 }
 
 static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
 {
-    if (!PyUnicode_CheckExact(obj) && obj != Py_None) {
-        PyErr_SetString(PyExc_TypeError, "AST identifier must be of type str");
+    if (!PyString_CheckExact(obj) && obj != Py_None) {
+        PyErr_Format(PyExc_TypeError,
+                    "AST identifier must be of type str");
         return 1;
     }
     return obj2ast_object(obj, out, arena);
@@ -884,8 +819,9 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
 
 static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
 {
-    if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
-        PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
+    if (!PyString_CheckExact(obj) && !PyUnicode_CheckExact(obj)) {
+        PyErr_SetString(PyExc_TypeError,
+                       "AST string must be of type str or unicode");
         return 1;
     }
     return obj2ast_object(obj, out, arena);
@@ -894,25 +830,47 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
 static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
 {
     int i;
-    if (!PyLong_Check(obj)) {
-        PyErr_Format(PyExc_ValueError, "invalid integer value: %R", obj);
+    if (!_PyAnyInt_Check(obj)) {
+        PyObject *s = PyObject_Repr(obj);
+        if (s == NULL) return 1;
+        PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s",
+                     PyString_AS_STRING(s));
+        Py_DECREF(s);
         return 1;
     }
 
-    i = _PyLong_AsInt(obj);
+    i = (int)PyLong_AsLong(obj);
     if (i == -1 && PyErr_Occurred())
         return 1;
     *out = i;
     return 0;
 }
 
+static int obj2ast_bool(PyObject* obj, bool* out, PyArena* arena)
+{
+    if (!PyBool_Check(obj)) {
+        PyObject *s = PyObject_Repr(obj);
+        if (s == NULL) return 1;
+        PyErr_Format(PyExc_ValueError, "invalid boolean value: %.400s",
+                     PyString_AS_STRING(s));
+        Py_DECREF(s);
+        return 1;
+    }
+
+    *out = (obj == Py_True);
+    return 0;
+}
+
 static int add_ast_fields(void)
 {
-    PyObject *empty_tuple;
+    PyObject *empty_tuple, *d;
+    if (PyType_Ready(&AST_type) < 0)
+        return -1;
+    d = AST_type.tp_dict;
     empty_tuple = PyTuple_New(0);
     if (!empty_tuple ||
-        PyObject_SetAttrString(astmodulestate_global->AST_type, "_fields", empty_tuple) < 0 ||
-        PyObject_SetAttrString(astmodulestate_global->AST_type, "_attributes", empty_tuple) < 0) {
+        PyDict_SetItemString(d, "_fields", empty_tuple) < 0 ||
+        PyDict_SetItemString(d, "_attributes", empty_tuple) < 0) {
         Py_XDECREF(empty_tuple);
         return -1;
     }
@@ -924,91 +882,71 @@ static int add_ast_fields(void)
 
         self.emit("static int init_types(void)",0)
         self.emit("{", 0)
-        self.emit("PyObject *m;", 1)
-        self.emit("if (PyState_FindModule(&_astmodule) == NULL) {", 1)
-        self.emit("m = PyModule_Create(&_astmodule);", 2)
-        self.emit("if (!m) return 0;", 2)
-        self.emit("PyState_AddModule(m, &_astmodule);", 2)
-        self.emit("}", 1)
-        self.emit("astmodulestate *state = astmodulestate_global;", 1)
-        self.emit("if (state->initialized) return 1;", 1)
-        self.emit("if (init_identifiers() < 0) return 0;", 1)
-        self.emit("state->AST_type = PyType_FromSpec(&AST_type_spec);", 1)
-        self.emit("if (!state->AST_type) return 0;", 1)
+        self.emit("static int initialized;", 1)
+        self.emit("if (initialized) return 1;", 1)
         self.emit("if (add_ast_fields() < 0) return 0;", 1)
         for dfn in mod.dfns:
             self.visit(dfn)
-        self.emit("state->initialized = 1;", 1)
+        self.emit("initialized = 1;", 1)
         self.emit("return 1;", 1);
         self.emit("}", 0)
 
     def visitProduct(self, prod, name):
         if prod.fields:
-            fields = name+"_fields"
+            fields = name.value+"_fields"
         else:
             fields = "NULL"
-        self.emit('state->%s_type = make_type("%s", state->AST_type, %s, %d);' %
+        self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' %
                         (name, name, fields, len(prod.fields)), 1)
-        self.emit("if (!state->%s_type) return 0;" % name, 1)
-        self.emit_type("AST_type")
-        self.emit_type("%s_type" % name)
-        if prod.attributes:
-            self.emit("if (!add_attributes(state->%s_type, %s_attributes, %d)) return 0;" %
-                            (name, name, len(prod.attributes)), 1)
-        else:
-            self.emit("if (!add_attributes(state->%s_type, NULL, 0)) return 0;" % name, 1)
+        self.emit("if (!%s_type) return 0;" % name, 1)
 
     def visitSum(self, sum, name):
-        self.emit('state->%s_type = make_type("%s", state->AST_type, NULL, 0);' %
+        self.emit('%s_type = make_type("%s", &AST_type, NULL, 0);' %
                   (name, name), 1)
-        self.emit_type("%s_type" % name)
-        self.emit("if (!state->%s_type) return 0;" % name, 1)
+        self.emit("if (!%s_type) return 0;" % name, 1)
         if sum.attributes:
-            self.emit("if (!add_attributes(state->%s_type, %s_attributes, %d)) return 0;" %
+            self.emit("if (!add_attributes(%s_type, %s_attributes, %d)) return 0;" %
                             (name, name, len(sum.attributes)), 1)
         else:
-            self.emit("if (!add_attributes(state->%s_type, NULL, 0)) return 0;" % name, 1)
+            self.emit("if (!add_attributes(%s_type, NULL, 0)) return 0;" % name, 1)
         simple = is_simple(sum)
         for t in sum.types:
             self.visitConstructor(t, name, simple)
 
     def visitConstructor(self, cons, name, simple):
         if cons.fields:
-            fields = cons.name+"_fields"
+            fields = cons.name.value+"_fields"
         else:
             fields = "NULL"
-        self.emit('state->%s_type = make_type("%s", state->%s_type, %s, %d);' %
+        self.emit('%s_type = make_type("%s", %s_type, %s, %d);' %
                             (cons.name, cons.name, name, fields, len(cons.fields)), 1)
-        self.emit("if (!state->%s_type) return 0;" % cons.name, 1)
-        self.emit_type("%s_type" % cons.name)
+        self.emit("if (!%s_type) return 0;" % cons.name, 1)
         if simple:
-            self.emit("state->%s_singleton = PyType_GenericNew((PyTypeObject *)"
-                      "state->%s_type, NULL, NULL);" %
+            self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" %
                              (cons.name, cons.name), 1)
-            self.emit("if (!state->%s_singleton) return 0;" % cons.name, 1)
+            self.emit("if (!%s_singleton) return 0;" % cons.name, 1)
 
 
 class ASTModuleVisitor(PickleVisitor):
 
     def visitModule(self, mod):
         self.emit("PyMODINIT_FUNC", 0)
-        self.emit("PyInit__ast(void)", 0)
+        self.emit("init_ast(void)", 0)
         self.emit("{", 0)
-        self.emit("PyObject *m;", 1)
-        self.emit("if (!init_types()) return NULL;", 1)
-        self.emit('m = PyState_FindModule(&_astmodule);', 1)
-        self.emit("if (!m) return NULL;", 1)
-        self.emit('Py_INCREF(astmodulestate(m)->AST_type);', 1)
-        self.emit('if (PyModule_AddObject(m, "AST", astmodulestate_global->AST_type) < 0) return NULL;', 1)
-        self.emit('if (PyModule_AddIntMacro(m, PyCF_ALLOW_TOP_LEVEL_AWAIT) < 0)', 1)
-        self.emit("return NULL;", 2)
-        self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
-        self.emit("return NULL;", 2)
-        self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
-        self.emit("return NULL;", 2)
+        self.emit("PyObject *m, *d;", 1)
+        self.emit("if (!init_types()) return;", 1)
+        self.emit('m = Py_InitModule3("_ast", NULL, NULL);', 1)
+        self.emit("if (!m) return;", 1)
+        self.emit("d = PyModule_GetDict(m);", 1)
+        self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return;', 1)
+        self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1)
+        self.emit("return;", 2)
+        # Value of version: "$Revision$"
+        self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)'
+                % mod.version, 1)
+        self.emit("return;", 2)
         for dfn in mod.dfns:
             self.visit(dfn)
-        self.emit("return m;", 1)
         self.emit("}", 0)
 
     def visitProduct(self, prod, name):
@@ -1023,9 +961,7 @@ class ASTModuleVisitor(PickleVisitor):
         self.addObj(cons.name)
 
     def addObj(self, name):
-        self.emit("if (PyModule_AddObject(m, \"%s\", "
-                  "astmodulestate_global->%s_type) < 0) return NULL;" % (name, name), 1)
-        self.emit("Py_INCREF(astmodulestate(m)->%s_type);" % name, 1)
+        self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1)
 
 
 _SPECIALIZED_SEQUENCES = ('stmt', 'expr')
@@ -1063,9 +999,9 @@ class ObjVisitor(PickleVisitor):
         self.emit("{", 0)
         self.emit("%s o = (%s)_o;" % (ctype, ctype), 1)
         self.emit("PyObject *result = NULL, *value = NULL;", 1)
-        self.emit("PyTypeObject *tp;", 1)
         self.emit('if (!o) {', 1)
-        self.emit("Py_RETURN_NONE;", 2)
+        self.emit("Py_INCREF(Py_None);", 2)
+        self.emit('return Py_None;', 2)
         self.emit("}", 1)
         self.emit('', 0)
 
@@ -1091,7 +1027,7 @@ class ObjVisitor(PickleVisitor):
         for a in sum.attributes:
             self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
             self.emit("if (!value) goto failed;", 1)
-            self.emit('if (PyObject_SetAttr(result, astmodulestate_global->%s, value) < 0)' % a.name, 1)
+            self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1)
             self.emit('goto failed;', 2)
             self.emit('Py_DECREF(value);', 1)
         self.func_end()
@@ -1102,8 +1038,8 @@ class ObjVisitor(PickleVisitor):
         self.emit("switch(o) {", 1)
         for t in sum.types:
             self.emit("case %s:" % t.name, 2)
-            self.emit("Py_INCREF(astmodulestate_global->%s_singleton);" % t.name, 3)
-            self.emit("return astmodulestate_global->%s_singleton;" % t.name, 3)
+            self.emit("Py_INCREF(%s_singleton);" % t.name, 3)
+            self.emit("return %s_singleton;" % t.name, 3)
         self.emit("default:", 2)
         self.emit('/* should never happen, but just in case ... */', 3)
         code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name
@@ -1114,23 +1050,15 @@ class ObjVisitor(PickleVisitor):
 
     def visitProduct(self, prod, name):
         self.func_begin(name)
-        self.emit("tp = (PyTypeObject *)astmodulestate_global->%s_type;" % name, 1)
-        self.emit("result = PyType_GenericNew(tp, NULL, NULL);", 1);
+        self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1);
         self.emit("if (!result) return NULL;", 1)
         for field in prod.fields:
             self.visitField(field, name, 1, True)
-        for a in prod.attributes:
-            self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
-            self.emit("if (!value) goto failed;", 1)
-            self.emit("if (PyObject_SetAttr(result, astmodulestate_global->%s, value) < 0)" % a.name, 1)
-            self.emit('goto failed;', 2)
-            self.emit('Py_DECREF(value);', 1)
         self.func_end()
 
     def visitConstructor(self, cons, enum, name):
         self.emit("case %s_kind:" % cons.name, 1)
-        self.emit("tp = (PyTypeObject *)astmodulestate_global->%s_type;" % cons.name, 2)
-        self.emit("result = PyType_GenericNew(tp, NULL, NULL);", 2);
+        self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2);
         self.emit("if (!result) goto failed;", 2)
         for f in cons.fields:
             self.visitField(f, cons.name, 2, False)
@@ -1145,7 +1073,7 @@ class ObjVisitor(PickleVisitor):
             value = "o->v.%s.%s" % (name, field.name)
         self.set(field, value, depth)
         emit("if (!value) goto failed;", 0)
-        emit("if (PyObject_SetAttr(result, astmodulestate_global->%s, value) == -1)" % field.name, 0)
+        emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0)
         emit("goto failed;", 1)
         emit("Py_DECREF(value);", 0)
 
@@ -1164,11 +1092,11 @@ class ObjVisitor(PickleVisitor):
     def set(self, field, value, depth):
         if field.seq:
             # XXX should really check for is_simple, but that requires a symbol table
-            if field.type == "cmpop":
+            if field.type.value == "cmpop":
                 # While the sequence elements are stored as void*,
                 # ast2obj_cmpop expects an enum
                 self.emit("{", depth)
-                self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1)
+                self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1)
                 self.emit("value = PyList_New(n);", depth+1)
                 self.emit("if (!value) goto failed;", depth+1)
                 self.emit("for(i = 0; i < n; i++)", depth+1)
@@ -1188,41 +1116,38 @@ class PartingShots(StaticVisitor):
     CODE = """
 PyObject* PyAST_mod2obj(mod_ty t)
 {
-    if (!init_types())
-        return NULL;
+    init_types();
     return ast2obj_mod(t);
 }
 
 /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
 mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
 {
+    mod_ty res;
     PyObject *req_type[3];
-    const char * const req_name[] = {"Module", "Expression", "Interactive"};
+    char *req_name[3];
     int isinstance;
 
-    if (PySys_Audit("compile", "OO", ast, Py_None) < 0) {
-        return NULL;
-    }
+    req_type[0] = (PyObject*)Module_type;
+    req_type[1] = (PyObject*)Expression_type;
+    req_type[2] = (PyObject*)Interactive_type;
 
-    req_type[0] = astmodulestate_global->Module_type;
-    req_type[1] = astmodulestate_global->Expression_type;
-    req_type[2] = astmodulestate_global->Interactive_type;
+    req_name[0] = "Module";
+    req_name[1] = "Expression";
+    req_name[2] = "Interactive";
 
     assert(0 <= mode && mode <= 2);
 
-    if (!init_types())
-        return NULL;
+    init_types();
 
     isinstance = PyObject_IsInstance(ast, req_type[mode]);
     if (isinstance == -1)
         return NULL;
     if (!isinstance) {
         PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s",
-                     req_name[mode], _PyType_Name(Py_TYPE(ast)));
+                     req_name[mode], Py_TYPE(ast)->tp_name);
         return NULL;
     }
-
-    mod_ty res = NULL;
     if (obj2ast_mod(ast, &res, arena) != 0)
         return NULL;
     else
@@ -1231,9 +1156,8 @@ mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
 
 int PyAST_Check(PyObject* obj)
 {
-    if (!init_types())
-        return -1;
-    return PyObject_IsInstance(obj, astmodulestate_global->AST_type);
+    init_types();
+    return PyObject_IsInstance(obj, (PyObject*)&AST_type);
 }
 """
 
@@ -1246,172 +1170,81 @@ class ChainOfVisitors:
             v.visit(object)
             v.emit("", 0)
 
+common_msg = "/* File automatically generated by %s. */\n\n"
 
-def generate_module_def(f, mod):
-    # Gather all the data needed for ModuleSpec
-    visitor_list = set()
-    with open(os.devnull, "w") as devnull:
-        visitor = PyTypesDeclareVisitor(devnull)
-        visitor.visit(mod)
-        visitor_list.add(visitor)
-        visitor = PyTypesVisitor(devnull)
-        visitor.visit(mod)
-        visitor_list.add(visitor)
-
-    state_strings = set(["__dict__", "_attributes", "_fields", "__module__", "_ast"])
-    module_state = set(["__dict__", "_attributes", "_fields", "__module__", "_ast"])
-    for visitor in visitor_list:
-        for identifier in visitor.identifiers:
-            module_state.add(identifier)
-            state_strings.add(identifier)
-        for singleton in visitor.singletons:
-            module_state.add(singleton)
-        for tp in visitor.types:
-            module_state.add(tp)
-    state_strings = sorted(state_strings)
-    module_state = sorted(module_state)
-    f.write('typedef struct {\n')
-    f.write('    int initialized;\n')
-    for s in module_state:
-        f.write('    PyObject *' + s + ';\n')
-    f.write('} astmodulestate;\n\n')
-    f.write("""
-#define astmodulestate(o) ((astmodulestate *)PyModule_GetState(o))
-
-static int astmodule_clear(PyObject *module)
-{
-""")
-    for s in module_state:
-        f.write("    Py_CLEAR(astmodulestate(module)->" + s + ');\n')
-    f.write("""
-    return 0;
-}
-
-static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
-{
-""")
-    for s in module_state:
-        f.write("    Py_VISIT(astmodulestate(module)->" + s + ');\n')
-    f.write("""
-    return 0;
-}
-
-static void astmodule_free(void* module) {
-    astmodule_clear((PyObject*)module);
-}
-
-static struct PyModuleDef _astmodule = {
-    PyModuleDef_HEAD_INIT,
-    "_ast",
-    NULL,
-    sizeof(astmodulestate),
-    NULL,
-    NULL,
-    astmodule_traverse,
-    astmodule_clear,
-    astmodule_free,
-};
-
-#define astmodulestate_global ((astmodulestate *)PyModule_GetState(PyState_FindModule(&_astmodule)))
-
-""")
-    f.write('static int init_identifiers(void)\n')
-    f.write('{\n')
-    f.write('    astmodulestate *state = astmodulestate_global;\n')
-    for identifier in state_strings:
-        f.write('    if ((state->' + identifier)
-        f.write(' = PyUnicode_InternFromString("')
-        f.write(identifier + '")) == NULL) return 0;\n')
-    f.write('    return 1;\n')
-    f.write('};\n\n')
+c_file_msg = """
+/*
+   __version__ %s.
 
+   This module must be committed separately after each AST grammar change;
+   The __version__ number is set to the revision number of the commit
+   containing the grammar change.
+*/
 
-common_msg = "/* File automatically generated by %s. */\n\n"
+"""
 
-def main(srcfile, dump_module=False):
+def main(srcfile):
     argv0 = sys.argv[0]
     components = argv0.split(os.sep)
     argv0 = os.sep.join(components[-2:])
     auto_gen_msg = common_msg % argv0
     mod = asdl.parse(srcfile)
-    if dump_module:
-        print('Parsed Module:')
-        print(mod)
+    mod.version = "82160"
     if not asdl.check(mod):
         sys.exit(1)
-    if H_FILE:
-        with open(H_FILE, "w") as f:
-            f.write(auto_gen_msg)
-            f.write('#ifndef Py_PYTHON_AST_H\n')
-            f.write('#define Py_PYTHON_AST_H\n')
-            f.write('#ifdef __cplusplus\n')
-            f.write('extern "C" {\n')
-            f.write('#endif\n')
-            f.write('\n')
-            f.write('#ifndef Py_LIMITED_API\n')
-            f.write('#include "asdl.h"\n')
-            f.write('\n')
-            f.write('#undef Yield   /* undefine macro conflicting with <winbase.h> */\n')
-            f.write('\n')
-            c = ChainOfVisitors(TypeDefVisitor(f),
-                                StructVisitor(f))
-
-            c.visit(mod)
-            f.write("// Note: these macros affect function definitions, not only call sites.\n")
-            PrototypeVisitor(f).visit(mod)
-            f.write("\n")
-            f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
-            f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
-            f.write("int PyAST_Check(PyObject* obj);\n")
-            f.write("#endif /* !Py_LIMITED_API */\n")
-            f.write('\n')
-            f.write('#ifdef __cplusplus\n')
-            f.write('}\n')
-            f.write('#endif\n')
-            f.write('#endif /* !Py_PYTHON_AST_H */\n')
-
-    if C_FILE:
-        with open(C_FILE, "w") as f:
-            f.write(auto_gen_msg)
-            f.write('#include <stddef.h>\n')
-            f.write('\n')
-            f.write('#include "Python.h"\n')
-            f.write('#include "%s-ast.h"\n' % mod.name)
-            f.write('#include "structmember.h"\n')
-            f.write('\n')
-
-            generate_module_def(f, mod)
-
-            v = ChainOfVisitors(
-                PyTypesDeclareVisitor(f),
-                PyTypesVisitor(f),
-                Obj2ModPrototypeVisitor(f),
-                FunctionVisitor(f),
-                ObjVisitor(f),
-                Obj2ModVisitor(f),
-                ASTModuleVisitor(f),
-                PartingShots(f),
-                )
-            v.visit(mod)
+    if INC_DIR:
+        p = "%s/%s-ast.h" % (INC_DIR, mod.name)
+        f = open(p, "wb")
+        f.write(auto_gen_msg)
+        f.write('#include "asdl.h"\n\n')
+        c = ChainOfVisitors(TypeDefVisitor(f),
+                            StructVisitor(f),
+                            PrototypeVisitor(f),
+                            )
+        c.visit(mod)
+        f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
+        f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
+        f.write("int PyAST_Check(PyObject* obj);\n")
+        f.close()
+
+    if SRC_DIR:
+        p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c")
+        f = open(p, "wb")
+        f.write(auto_gen_msg)
+        f.write(c_file_msg % mod.version)
+        f.write('#include "Python.h"\n')
+        f.write('#include "%s-ast.h"\n' % mod.name)
+        f.write('\n')
+        f.write("static PyTypeObject AST_type;\n")
+        v = ChainOfVisitors(
+            PyTypesDeclareVisitor(f),
+            PyTypesVisitor(f),
+            Obj2ModPrototypeVisitor(f),
+            FunctionVisitor(f),
+            ObjVisitor(f),
+            Obj2ModVisitor(f),
+            ASTModuleVisitor(f),
+            PartingShots(f),
+            )
+        v.visit(mod)
+        f.close()
 
 if __name__ == "__main__":
+    import sys
     import getopt
 
-    H_FILE = ''
-    C_FILE = ''
-    dump_module = False
-    opts, args = getopt.getopt(sys.argv[1:], "dh:c:")
+    INC_DIR = ''
+    SRC_DIR = ''
+    opts, args = getopt.getopt(sys.argv[1:], "h:c:")
+    if len(opts) != 1:
+        print "Must specify exactly one output file"
+        sys.exit(1)
     for o, v in opts:
         if o == '-h':
-            H_FILE = v
-        elif o == '-c':
-            C_FILE = v
-        elif o == '-d':
-            dump_module = True
-    if H_FILE and C_FILE:
-        print('Must specify exactly one output file')
-        sys.exit(1)
-    elif len(args) != 1:
-        print('Must specify single input file')
+            INC_DIR = v
+        if o == '-c':
+            SRC_DIR = v
+    if len(args) != 1:
+        print "Must specify single input file"
         sys.exit(1)
-    main(args[0], dump_module)
+    main(args[0])
diff --git a/Parser/bitset.c b/Parser/bitset.c
new file mode 100644
index 0000000..f5bfd41
--- /dev/null
+++ b/Parser/bitset.c
@@ -0,0 +1,66 @@
+
+/* Bitset primitives used by the parser generator */
+
+#include "pgenheaders.h"
+#include "bitset.h"
+
+bitset
+newbitset(int nbits)
+{
+    int nbytes = NBYTES(nbits);
+    bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) *  nbytes);
+
+    if (ss == NULL)
+        Py_FatalError("no mem for bitset");
+
+    ss += nbytes;
+    while (--nbytes >= 0)
+        *--ss = 0;
+    return ss;
+}
+
+void
+delbitset(bitset ss)
+{
+    PyObject_FREE(ss);
+}
+
+int
+addbit(bitset ss, int ibit)
+{
+    int ibyte = BIT2BYTE(ibit);
+    BYTE mask = BIT2MASK(ibit);
+
+    if (ss[ibyte] & mask)
+        return 0; /* Bit already set */
+    ss[ibyte] |= mask;
+    return 1;
+}
+
+#if 0 /* Now a macro */
+int
+testbit(bitset ss, int ibit)
+{
+    return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
+}
+#endif
+
+int
+samebitset(bitset ss1, bitset ss2, int nbits)
+{
+    int i;
+
+    for (i = NBYTES(nbits); --i >= 0; )
+        if (*ss1++ != *ss2++)
+            return 0;
+    return 1;
+}
+
+void
+mergebitset(bitset ss1, bitset ss2, int nbits)
+{
+    int i;
+
+    for (i = NBYTES(nbits); --i >= 0; )
+        *ss1++ |= *ss2++;
+}
diff --git a/Parser/firstsets.c b/Parser/firstsets.c
new file mode 100644
index 0000000..ee75d1b
--- /dev/null
+++ b/Parser/firstsets.c
@@ -0,0 +1,113 @@
+
+/* Computation of FIRST stets */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "token.h"
+
+extern int Py_DebugFlag;
+
+/* Forward */
+static void calcfirstset(grammar *, dfa *);
+
+void
+addfirstsets(grammar *g)
+{
+    int i;
+    dfa *d;
+
+    if (Py_DebugFlag)
+        printf("Adding FIRST sets ...\n");
+    for (i = 0; i < g->g_ndfas; i++) {
+        d = &g->g_dfa[i];
+        if (d->d_first == NULL)
+            calcfirstset(g, d);
+    }
+}
+
+static void
+calcfirstset(grammar *g, dfa *d)
+{
+    int i, j;
+    state *s;
+    arc *a;
+    int nsyms;
+    int *sym;
+    int nbits;
+    static bitset dummy;
+    bitset result;
+    int type;
+    dfa *d1;
+    label *l0;
+
+    if (Py_DebugFlag)
+        printf("Calculate FIRST set for '%s'\n", d->d_name);
+
+    if (dummy == NULL)
+        dummy = newbitset(1);
+    if (d->d_first == dummy) {
+        fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
+        return;
+    }
+    if (d->d_first != NULL) {
+        fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
+            d->d_name);
+    }
+    d->d_first = dummy;
+
+    l0 = g->g_ll.ll_label;
+    nbits = g->g_ll.ll_nlabels;
+    result = newbitset(nbits);
+
+    sym = (int *)PyObject_MALLOC(sizeof(int));
+    if (sym == NULL)
+        Py_FatalError("no mem for new sym in calcfirstset");
+    nsyms = 1;
+    sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
+
+    s = &d->d_state[d->d_initial];
+    for (i = 0; i < s->s_narcs; i++) {
+        a = &s->s_arc[i];
+        for (j = 0; j < nsyms; j++) {
+            if (sym[j] == a->a_lbl)
+                break;
+        }
+        if (j >= nsyms) { /* New label */
+            sym = (int *)PyObject_REALLOC(sym,
+                                    sizeof(int) * (nsyms + 1));
+            if (sym == NULL)
+                Py_FatalError(
+                    "no mem to resize sym in calcfirstset");
+            sym[nsyms++] = a->a_lbl;
+            type = l0[a->a_lbl].lb_type;
+            if (ISNONTERMINAL(type)) {
+                d1 = PyGrammar_FindDFA(g, type);
+                if (d1->d_first == dummy) {
+                    fprintf(stderr,
+                        "Left-recursion below '%s'\n",
+                        d->d_name);
+                }
+                else {
+                    if (d1->d_first == NULL)
+                        calcfirstset(g, d1);
+                    mergebitset(result,
+                                d1->d_first, nbits);
+                }
+            }
+            else if (ISTERMINAL(type)) {
+                addbit(result, a->a_lbl);
+            }
+        }
+    }
+    d->d_first = result;
+    if (Py_DebugFlag) {
+        printf("FIRST set for '%s': {", d->d_name);
+        for (i = 0; i < nbits; i++) {
+            if (testbit(result, i))
+                printf(" %s", PyGrammar_LabelRepr(&l0[i]));
+        }
+        printf(" }\n");
+    }
+
+    PyObject_FREE(sym);
+}
diff --git a/Parser/grammar.c b/Parser/grammar.c
new file mode 100644
index 0000000..fcd2219
--- /dev/null
+++ b/Parser/grammar.c
@@ -0,0 +1,272 @@
+
+/* Grammar implementation */
+
+#include "Python.h"
+#include "pgenheaders.h"
+
+#include <ctype.h>
+
+#include "token.h"
+#include "grammar.h"
+
+#ifdef RISCOS
+#include <unixlib.h>
+#endif
+
+extern int Py_DebugFlag;
+
+grammar *
+newgrammar(int start)
+{
+    grammar *g;
+
+    g = (grammar *)PyObject_MALLOC(sizeof(grammar));
+    if (g == NULL)
+        Py_FatalError("no mem for new grammar");
+    g->g_ndfas = 0;
+    g->g_dfa = NULL;
+    g->g_start = start;
+    g->g_ll.ll_nlabels = 0;
+    g->g_ll.ll_label = NULL;
+    g->g_accel = 0;
+    return g;
+}
+
+void
+freegrammar(grammar *g)
+{
+    int i;
+    for (i = 0; i < g->g_ndfas; i++) {
+        int j;
+        free(g->g_dfa[i].d_name);
+        for (j = 0; j < g->g_dfa[i].d_nstates; j++)
+            PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
+        PyObject_FREE(g->g_dfa[i].d_state);
+    }
+    PyObject_FREE(g->g_dfa);
+    for (i = 0; i < g->g_ll.ll_nlabels; i++)
+        free(g->g_ll.ll_label[i].lb_str);
+    PyObject_FREE(g->g_ll.ll_label);
+    PyObject_FREE(g);
+}
+
+dfa *
+adddfa(grammar *g, int type, char *name)
+{
+    dfa *d;
+
+    g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
+                                        sizeof(dfa) * (g->g_ndfas + 1));
+    if (g->g_dfa == NULL)
+        Py_FatalError("no mem to resize dfa in adddfa");
+    d = &g->g_dfa[g->g_ndfas++];
+    d->d_type = type;
+    d->d_name = strdup(name);
+    d->d_nstates = 0;
+    d->d_state = NULL;
+    d->d_initial = -1;
+    d->d_first = NULL;
+    return d; /* Only use while fresh! */
+}
+
+int
+addstate(dfa *d)
+{
+    state *s;
+
+    d->d_state = (state *)PyObject_REALLOC(d->d_state,
+                                  sizeof(state) * (d->d_nstates + 1));
+    if (d->d_state == NULL)
+        Py_FatalError("no mem to resize state in addstate");
+    s = &d->d_state[d->d_nstates++];
+    s->s_narcs = 0;
+    s->s_arc = NULL;
+    s->s_lower = 0;
+    s->s_upper = 0;
+    s->s_accel = NULL;
+    s->s_accept = 0;
+    return s - d->d_state;
+}
+
+void
+addarc(dfa *d, int from, int to, int lbl)
+{
+    state *s;
+    arc *a;
+
+    assert(0 <= from && from < d->d_nstates);
+    assert(0 <= to && to < d->d_nstates);
+
+    s = &d->d_state[from];
+    s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
+    if (s->s_arc == NULL)
+        Py_FatalError("no mem to resize arc list in addarc");
+    a = &s->s_arc[s->s_narcs++];
+    a->a_lbl = lbl;
+    a->a_arrow = to;
+}
+
+int
+addlabel(labellist *ll, int type, char *str)
+{
+    int i;
+    label *lb;
+
+    for (i = 0; i < ll->ll_nlabels; i++) {
+        if (ll->ll_label[i].lb_type == type &&
+            strcmp(ll->ll_label[i].lb_str, str) == 0)
+            return i;
+    }
+    ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
+                                    sizeof(label) * (ll->ll_nlabels + 1));
+    if (ll->ll_label == NULL)
+        Py_FatalError("no mem to resize labellist in addlabel");
+    lb = &ll->ll_label[ll->ll_nlabels++];
+    lb->lb_type = type;
+    lb->lb_str = strdup(str);
+    if (Py_DebugFlag)
+        printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
+               PyGrammar_LabelRepr(lb));
+    return lb - ll->ll_label;
+}
+
+/* Same, but rather dies than adds */
+
+int
+findlabel(labellist *ll, int type, char *str)
+{
+    int i;
+
+    for (i = 0; i < ll->ll_nlabels; i++) {
+        if (ll->ll_label[i].lb_type == type /*&&
+            strcmp(ll->ll_label[i].lb_str, str) == 0*/)
+            return i;
+    }
+    fprintf(stderr, "Label %d/'%s' not found\n", type, str);
+    Py_FatalError("grammar.c:findlabel()");
+    return 0; /* Make gcc -Wall happy */
+}
+
+/* Forward */
+static void translabel(grammar *, label *);
+
+void
+translatelabels(grammar *g)
+{
+    int i;
+
+#ifdef Py_DEBUG
+    printf("Translating labels ...\n");
+#endif
+    /* Don't translate EMPTY */
+    for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
+        translabel(g, &g->g_ll.ll_label[i]);
+}
+
+static void
+translabel(grammar *g, label *lb)
+{
+    int i;
+
+    if (Py_DebugFlag)
+        printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
+
+    if (lb->lb_type == NAME) {
+        for (i = 0; i < g->g_ndfas; i++) {
+            if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
+                if (Py_DebugFlag)
+                    printf(
+                        "Label %s is non-terminal %d.\n",
+                        lb->lb_str,
+                        g->g_dfa[i].d_type);
+                lb->lb_type = g->g_dfa[i].d_type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+                return;
+            }
+        }
+        for (i = 0; i < (int)N_TOKENS; i++) {
+            if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
+                if (Py_DebugFlag)
+                    printf("Label %s is terminal %d.\n",
+                        lb->lb_str, i);
+                lb->lb_type = i;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+                return;
+            }
+        }
+        printf("Can't translate NAME label '%s'\n", lb->lb_str);
+        return;
+    }
+
+    if (lb->lb_type == STRING) {
+        if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
+            lb->lb_str[1] == '_') {
+            char *p;
+            char *src;
+            char *dest;
+            size_t name_len;
+            if (Py_DebugFlag)
+                printf("Label %s is a keyword\n", lb->lb_str);
+            lb->lb_type = NAME;
+            src = lb->lb_str + 1;
+            p = strchr(src, '\'');
+            if (p)
+                name_len = p - src;
+            else
+                name_len = strlen(src);
+            dest = (char *)malloc(name_len + 1);
+            if (!dest) {
+                printf("Can't alloc dest '%s'\n", src);
+                return;
+            }
+            strncpy(dest, src, name_len);
+            dest[name_len] = '\0';
+            free(lb->lb_str);
+            lb->lb_str = dest;
+        }
+        else if (lb->lb_str[2] == lb->lb_str[0]) {
+            int type = (int) PyToken_OneChar(lb->lb_str[1]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
+            int type = (int) PyToken_TwoChars(lb->lb_str[1],
+                                       lb->lb_str[2]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
+            int type = (int) PyToken_ThreeChars(lb->lb_str[1],
+                                                lb->lb_str[2],
+                                                lb->lb_str[3]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else
+            printf("Can't translate STRING label %s\n",
+                lb->lb_str);
+    }
+    else
+        printf("Can't translate label '%s'\n",
+               PyGrammar_LabelRepr(lb));
+}
diff --git a/Parser/grammar1.c b/Parser/grammar1.c
index e0b8fbb..1f7d264 100644
--- a/Parser/grammar1.c
+++ b/Parser/grammar1.c
@@ -2,21 +2,35 @@
 /* Grammar subroutines needed by parser */
 
 #include "Python.h"
+#include "pgenheaders.h"
 #include "grammar.h"
 #include "token.h"
 
 /* Return the DFA for the given type */
 
-const dfa *
-PyGrammar_FindDFA(grammar *g, int type)
+dfa *
+PyGrammar_FindDFA(grammar *g, register int type)
 {
+    register dfa *d;
+#if 1
     /* Massive speed-up */
-    const dfa *d = &g->g_dfa[type - NT_OFFSET];
+    d = &g->g_dfa[type - NT_OFFSET];
     assert(d->d_type == type);
     return d;
+#else
+    /* Old, slow version */
+    register int i;
+
+    for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
+        if (d->d_type == type)
+            return d;
+    }
+    assert(0);
+    /* NOTREACHED */
+#endif
 }
 
-const char *
+char *
 PyGrammar_LabelRepr(label *lb)
 {
     static char buf[100];
@@ -31,7 +45,7 @@ PyGrammar_LabelRepr(label *lb)
         else
             return lb->lb_str;
     }
-    else if (lb->lb_type < N_TOKENS) {
+    else {
         if (lb->lb_str == NULL)
             return _PyParser_TokenNames[lb->lb_type];
         else {
@@ -40,8 +54,4 @@ PyGrammar_LabelRepr(label *lb)
             return buf;
         }
     }
-    else {
-        Py_FatalError("invalid label");
-        return NULL;
-    }
 }
diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c
new file mode 100644
index 0000000..5844a9a
--- /dev/null
+++ b/Parser/intrcheck.c
@@ -0,0 +1,178 @@
+
+/* Check for interrupts */
+
+#include "Python.h"
+#include "pythread.h"
+
+#ifdef QUICKWIN
+
+#include <io.h>
+
+void
+PyOS_InitInterrupts(void)
+{
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+    _wyield();
+}
+
+#define OK
+
+#endif /* QUICKWIN */
+
+#if defined(_M_IX86) && !defined(__QNX__)
+#include <io.h>
+#endif
+
+#if defined(MSDOS) && !defined(QUICKWIN)
+
+#ifdef __GNUC__
+
+/* This is for DJGPP's GO32 extender.  I don't know how to trap
+ * control-C  (There's no API for ctrl-C, and I don't want to mess with
+ * the interrupt vectors.)  However, this DOES catch control-break.
+ * --Amrit
+ */
+
+#include <go32.h>
+
+void
+PyOS_InitInterrupts(void)
+{
+    _go32_want_ctrl_break(1 /* TRUE */);
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+    return _go32_was_ctrl_break_hit();
+}
+
+#else /* !__GNUC__ */
+
+/* This might work for MS-DOS (untested though): */
+
+void
+PyOS_InitInterrupts(void)
+{
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+    int interrupted = 0;
+    while (kbhit()) {
+        if (getch() == '\003')
+            interrupted = 1;
+    }
+    return interrupted;
+}
+
+#endif /* __GNUC__ */
+
+#define OK
+
+#endif /* MSDOS && !QUICKWIN */
+
+
+#ifndef OK
+
+/* Default version -- for real operating systems and for Standard C */
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+
+static int interrupted;
+
+void
+PyErr_SetInterrupt(void)
+{
+    interrupted = 1;
+}
+
+extern int PyErr_CheckSignals(void);
+
+static int
+checksignals_witharg(void * arg)
+{
+    return PyErr_CheckSignals();
+}
+
+static void
+intcatcher(int sig)
+{
+    extern void Py_Exit(int);
+    static char message[] =
+"python: to interrupt a truly hanging Python program, interrupt once more.\n";
+    switch (interrupted++) {
+    case 0:
+        break;
+    case 1:
+#ifdef RISCOS
+        fprintf(stderr, message);
+#else
+        write(2, message, strlen(message));
+#endif
+        break;
+    case 2:
+        interrupted = 0;
+        Py_Exit(1);
+        break;
+    }
+    PyOS_setsig(SIGINT, intcatcher);
+    Py_AddPendingCall(checksignals_witharg, NULL);
+}
+
+static void (*old_siginthandler)(int) = SIG_DFL;
+
+void
+PyOS_InitInterrupts(void)
+{
+    if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
+        PyOS_setsig(SIGINT, intcatcher);
+}
+
+void
+PyOS_FiniInterrupts(void)
+{
+    PyOS_setsig(SIGINT, old_siginthandler);
+}
+
+int
+PyOS_InterruptOccurred(void)
+{
+    if (!interrupted)
+        return 0;
+    interrupted = 0;
+    return 1;
+}
+
+#endif /* !OK */
+
+void
+PyOS_AfterFork(void)
+{
+#ifdef WITH_THREAD
+    PyThread_ReInitTLS();
+    PyEval_ReInitThreads();
+#endif
+}
diff --git a/Parser/listnode.c b/Parser/listnode.c
index d431ae5..b5f8ad2 100644
--- a/Parser/listnode.c
+++ b/Parser/listnode.c
@@ -1,8 +1,7 @@
 
 /* List a node on a file */
 
-#include "Python.h"
-#include "pycore_pystate.h"
+#include "pgenheaders.h"
 #include "token.h"
 #include "node.h"
 
@@ -16,22 +15,20 @@ PyNode_ListTree(node *n)
     listnode(stdout, n);
 }
 
+static int level, atbol;
+
 static void
 listnode(FILE *fp, node *n)
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-
-    interp->parser.listnode.level = 0;
-    interp->parser.listnode.atbol = 1;
+    level = 0;
+    atbol = 1;
     list1node(fp, n);
 }
 
 static void
 list1node(FILE *fp, node *n)
 {
-    PyInterpreterState *interp;
-
-    if (n == NULL)
+    if (n == 0)
         return;
     if (ISNONTERMINAL(TYPE(n))) {
         int i;
@@ -39,26 +36,25 @@ list1node(FILE *fp, node *n)
             list1node(fp, CHILD(n, i));
     }
     else if (ISTERMINAL(TYPE(n))) {
-        interp = _PyInterpreterState_GET_UNSAFE();
         switch (TYPE(n)) {
         case INDENT:
-            interp->parser.listnode.level++;
+            ++level;
             break;
         case DEDENT:
-            interp->parser.listnode.level--;
+            --level;
             break;
         default:
-            if (interp->parser.listnode.atbol) {
+            if (atbol) {
                 int i;
-                for (i = 0; i < interp->parser.listnode.level; ++i)
+                for (i = 0; i < level; ++i)
                     fprintf(fp, "\t");
-                interp->parser.listnode.atbol = 0;
+                atbol = 0;
             }
             if (TYPE(n) == NEWLINE) {
                 if (STR(n) != NULL)
                     fprintf(fp, "%s", STR(n));
                 fprintf(fp, "\n");
-                interp->parser.listnode.atbol = 1;
+                atbol = 1;
             }
             else
                 fprintf(fp, "%s ", STR(n));
diff --git a/Parser/metagrammar.c b/Parser/metagrammar.c
new file mode 100644
index 0000000..53810b8
--- /dev/null
+++ b/Parser/metagrammar.c
@@ -0,0 +1,159 @@
+
+#include "pgenheaders.h"
+#include "metagrammar.h"
+#include "grammar.h"
+#include "pgen.h"
+static arc arcs_0_0[3] = {
+    {2, 0},
+    {3, 0},
+    {4, 1},
+};
+static arc arcs_0_1[1] = {
+    {0, 1},
+};
+static state states_0[2] = {
+    {3, arcs_0_0},
+    {1, arcs_0_1},
+};
+static arc arcs_1_0[1] = {
+    {5, 1},
+};
+static arc arcs_1_1[1] = {
+    {6, 2},
+};
+static arc arcs_1_2[1] = {
+    {7, 3},
+};
+static arc arcs_1_3[1] = {
+    {3, 4},
+};
+static arc arcs_1_4[1] = {
+    {0, 4},
+};
+static state states_1[5] = {
+    {1, arcs_1_0},
+    {1, arcs_1_1},
+    {1, arcs_1_2},
+    {1, arcs_1_3},
+    {1, arcs_1_4},
+};
+static arc arcs_2_0[1] = {
+    {8, 1},
+};
+static arc arcs_2_1[2] = {
+    {9, 0},
+    {0, 1},
+};
+static state states_2[2] = {
+    {1, arcs_2_0},
+    {2, arcs_2_1},
+};
+static arc arcs_3_0[1] = {
+    {10, 1},
+};
+static arc arcs_3_1[2] = {
+    {10, 1},
+    {0, 1},
+};
+static state states_3[2] = {
+    {1, arcs_3_0},
+    {2, arcs_3_1},
+};
+static arc arcs_4_0[2] = {
+    {11, 1},
+    {13, 2},
+};
+static arc arcs_4_1[1] = {
+    {7, 3},
+};
+static arc arcs_4_2[3] = {
+    {14, 4},
+    {15, 4},
+    {0, 2},
+};
+static arc arcs_4_3[1] = {
+    {12, 4},
+};
+static arc arcs_4_4[1] = {
+    {0, 4},
+};
+static state states_4[5] = {
+    {2, arcs_4_0},
+    {1, arcs_4_1},
+    {3, arcs_4_2},
+    {1, arcs_4_3},
+    {1, arcs_4_4},
+};
+static arc arcs_5_0[3] = {
+    {5, 1},
+    {16, 1},
+    {17, 2},
+};
+static arc arcs_5_1[1] = {
+    {0, 1},
+};
+static arc arcs_5_2[1] = {
+    {7, 3},
+};
+static arc arcs_5_3[1] = {
+    {18, 1},
+};
+static state states_5[4] = {
+    {3, arcs_5_0},
+    {1, arcs_5_1},
+    {1, arcs_5_2},
+    {1, arcs_5_3},
+};
+static dfa dfas[6] = {
+    {256, "MSTART", 0, 2, states_0,
+     "\070\000\000"},
+    {257, "RULE", 0, 5, states_1,
+     "\040\000\000"},
+    {258, "RHS", 0, 2, states_2,
+     "\040\010\003"},
+    {259, "ALT", 0, 2, states_3,
+     "\040\010\003"},
+    {260, "ITEM", 0, 5, states_4,
+     "\040\010\003"},
+    {261, "ATOM", 0, 4, states_5,
+     "\040\000\003"},
+};
+static label labels[19] = {
+    {0, "EMPTY"},
+    {256, 0},
+    {257, 0},
+    {4, 0},
+    {0, 0},
+    {1, 0},
+    {11, 0},
+    {258, 0},
+    {259, 0},
+    {18, 0},
+    {260, 0},
+    {9, 0},
+    {10, 0},
+    {261, 0},
+    {16, 0},
+    {14, 0},
+    {3, 0},
+    {7, 0},
+    {8, 0},
+};
+static grammar _PyParser_Grammar = {
+    6,
+    dfas,
+    {19, labels},
+    256
+};
+
+grammar *
+meta_grammar(void)
+{
+    return &_PyParser_Grammar;
+}
+
+grammar *
+Py_meta_grammar(void)
+{
+  return meta_grammar();
+}
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 43e5583..5376214 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -10,31 +10,40 @@
 */
 
 #include "Python.h"
-#include "pycore_pystate.h"
 #ifdef MS_WINDOWS
 #define WIN32_LEAN_AND_MEAN
 #include "windows.h"
 #endif /* MS_WINDOWS */
 
+#ifdef __VMS
+extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt);
+#endif
+
 
-PyThreadState* _PyOS_ReadlineTState = NULL;
+PyThreadState* _PyOS_ReadlineTState;
 
+#ifdef WITH_THREAD
 #include "pythread.h"
 static PyThread_type_lock _PyOS_ReadlineLock = NULL;
+#endif
 
 int (*PyOS_InputHook)(void) = NULL;
 
+#ifdef RISCOS
+int Py_RISCOSWimpFlag;
+#endif
+
 /* This function restarts a fgets() after an EINTR error occurred
    except if PyOS_InterruptOccurred() returns true. */
 
 static int
 my_fgets(char *buf, int len, FILE *fp)
 {
+    char *p;
 #ifdef MS_WINDOWS
-    HANDLE hInterruptEvent;
+    int i;
 #endif
-    char *p;
-    int err;
+
     while (1) {
         if (PyOS_InputHook != NULL)
             (void)(PyOS_InputHook)();
@@ -43,29 +52,24 @@ my_fgets(char *buf, int len, FILE *fp)
         p = fgets(buf, len, fp);
         if (p != NULL)
             return 0; /* No error */
-        err = errno;
 #ifdef MS_WINDOWS
         /* Ctrl-C anywhere on the line or Ctrl-Z if the only character
            on a line will set ERROR_OPERATION_ABORTED. Under normal
            circumstances Ctrl-C will also have caused the SIGINT handler
-           to fire which will have set the event object returned by
-           _PyOS_SigintEvent. This signal fires in another thread and
-           is not guaranteed to have occurred before this point in the
-           code.
-
-           Therefore: check whether the event is set with a small timeout.
-           If it is, assume this is a Ctrl-C and reset the event. If it
-           isn't set assume that this is a Ctrl-Z on its own and drop
-           through to check for EOF.
+           to fire. This signal fires in another thread and is not
+           guaranteed to have occurred before this point in the code.
+
+           Therefore: check in a small loop to see if the trigger has
+           fired, in which case assume this is a Ctrl-C event. If it
+           hasn't fired within 10ms assume that this is a Ctrl-Z on its
+           own or that the signal isn't going to fire for some other
+           reason and drop through to check for EOF.
         */
         if (GetLastError()==ERROR_OPERATION_ABORTED) {
-            hInterruptEvent = _PyOS_SigintEvent();
-            switch (WaitForSingleObjectEx(hInterruptEvent, 10, FALSE)) {
-            case WAIT_OBJECT_0:
-                ResetEvent(hInterruptEvent);
-                return 1; /* Interrupt */
-            case WAIT_FAILED:
-                return -2; /* Error */
+            for (i = 0; i < 10; i++) {
+                if (PyOS_InterruptOccurred())
+                    return 1;
+                Sleep(1);
             }
         }
 #endif /* MS_WINDOWS */
@@ -74,14 +78,18 @@ my_fgets(char *buf, int len, FILE *fp)
             return -1; /* EOF */
         }
 #ifdef EINTR
-        if (err == EINTR) {
+        if (errno == EINTR) {
             int s;
+#ifdef WITH_THREAD
             PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
             s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
             PyEval_SaveThread();
+#endif
             if (s < 0)
                     return 1;
-        /* try again */
+	    /* try again */
             continue;
         }
 #endif
@@ -93,185 +101,35 @@ my_fgets(char *buf, int len, FILE *fp)
     /* NOTREACHED */
 }
 
-#ifdef MS_WINDOWS
-/* Readline implementation using ReadConsoleW */
-
-extern char _get_console_type(HANDLE handle);
-
-char *
-_PyOS_WindowsConsoleReadline(HANDLE hStdIn)
-{
-    static wchar_t wbuf_local[1024 * 16];
-    const DWORD chunk_size = 1024;
-
-    DWORD n_read, total_read, wbuflen, u8len;
-    wchar_t *wbuf;
-    char *buf = NULL;
-    int err = 0;
-
-    n_read = (DWORD)-1;
-    total_read = 0;
-    wbuf = wbuf_local;
-    wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1;
-    while (1) {
-        if (PyOS_InputHook != NULL) {
-            (void)(PyOS_InputHook)();
-        }
-        if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) {
-            err = GetLastError();
-            goto exit;
-        }
-        if (n_read == (DWORD)-1 && (err = GetLastError()) == ERROR_OPERATION_ABORTED) {
-            break;
-        }
-        if (n_read == 0) {
-            int s;
-            err = GetLastError();
-            if (err != ERROR_OPERATION_ABORTED)
-                goto exit;
-            err = 0;
-            HANDLE hInterruptEvent = _PyOS_SigintEvent();
-            if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE)
-                    == WAIT_OBJECT_0) {
-                ResetEvent(hInterruptEvent);
-                PyEval_RestoreThread(_PyOS_ReadlineTState);
-                s = PyErr_CheckSignals();
-                PyEval_SaveThread();
-                if (s < 0)
-                    goto exit;
-            }
-            break;
-        }
-
-        total_read += n_read;
-        if (total_read == 0 || wbuf[total_read - 1] == L'\n') {
-            break;
-        }
-        wbuflen += chunk_size;
-        if (wbuf == wbuf_local) {
-            wbuf[total_read] = '\0';
-            wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t));
-            if (wbuf)
-                wcscpy_s(wbuf, wbuflen, wbuf_local);
-            else {
-                PyErr_NoMemory();
-                goto exit;
-            }
-        }
-        else {
-            wchar_t *tmp = PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t));
-            if (tmp == NULL) {
-                PyErr_NoMemory();
-                goto exit;
-            }
-            wbuf = tmp;
-        }
-    }
-
-    if (wbuf[0] == '\x1a') {
-        buf = PyMem_RawMalloc(1);
-        if (buf)
-            buf[0] = '\0';
-        else {
-            PyErr_NoMemory();
-        }
-        goto exit;
-    }
-
-    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL);
-    buf = PyMem_RawMalloc(u8len + 1);
-    if (buf == NULL) {
-        PyErr_NoMemory();
-        goto exit;
-    }
-    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL);
-    buf[u8len] = '\0';
-
-exit:
-    if (wbuf != wbuf_local)
-        PyMem_RawFree(wbuf);
-
-    if (err) {
-        PyEval_RestoreThread(_PyOS_ReadlineTState);
-        PyErr_SetFromWindowsErr(err);
-        PyEval_SaveThread();
-    }
-
-    return buf;
-}
-
-#endif
-
 
 /* Readline implementation using fgets() */
 
 char *
-PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
+PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
 {
     size_t n;
     char *p, *pr;
-
-#ifdef MS_WINDOWS
-    if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) {
-        HANDLE hStdIn, hStdErr;
-
-        _Py_BEGIN_SUPPRESS_IPH
-        hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin));
-        hStdErr = (HANDLE)_get_osfhandle(fileno(stderr));
-        _Py_END_SUPPRESS_IPH
-
-        if (_get_console_type(hStdIn) == 'r') {
-            fflush(sys_stdout);
-            if (prompt) {
-                if (_get_console_type(hStdErr) == 'w') {
-                    wchar_t *wbuf;
-                    int wlen;
-                    wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
-                            NULL, 0);
-                    if (wlen) {
-                        wbuf = PyMem_RawMalloc(wlen * sizeof(wchar_t));
-                        if (wbuf == NULL) {
-                            PyErr_NoMemory();
-                            return NULL;
-                        }
-                        wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
-                                wbuf, wlen);
-                        if (wlen) {
-                            DWORD n;
-                            fflush(stderr);
-                            /* wlen includes null terminator, so subtract 1 */
-                            WriteConsoleW(hStdErr, wbuf, wlen - 1, &n, NULL);
-                        }
-                        PyMem_RawFree(wbuf);
-                    }
-                } else {
-                    fprintf(stderr, "%s", prompt);
-                    fflush(stderr);
-                }
-            }
-            clearerr(sys_stdin);
-            return _PyOS_WindowsConsoleReadline(hStdIn);
-        }
-    }
-#endif
-
     n = 100;
-    p = (char *)PyMem_RawMalloc(n);
-    if (p == NULL) {
-        PyErr_NoMemory();
+    if ((p = (char *)PyMem_MALLOC(n)) == NULL)
         return NULL;
-    }
-
     fflush(sys_stdout);
+#ifndef RISCOS
     if (prompt)
         fprintf(stderr, "%s", prompt);
+#else
+    if (prompt) {
+        if(Py_RISCOSWimpFlag)
+            fprintf(stderr, "\x0cr%s\x0c", prompt);
+        else
+            fprintf(stderr, "%s", prompt);
+    }
+#endif
     fflush(stderr);
-
     switch (my_fgets(p, (int)n, sys_stdin)) {
     case 0: /* Normal case */
         break;
     case 1: /* Interrupt */
-        PyMem_RawFree(p);
+        PyMem_FREE(p);
         return NULL;
     case -1: /* EOF */
     case -2: /* Error */
@@ -283,13 +141,13 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
     while (n > 0 && p[n-1] != '\n') {
         size_t incr = n+2;
         if (incr > INT_MAX) {
-            PyMem_RawFree(p);
+            PyMem_FREE(p);
             PyErr_SetString(PyExc_OverflowError, "input line too long");
             return NULL;
         }
-        pr = (char *)PyMem_RawRealloc(p, n + incr);
+        pr = (char *)PyMem_REALLOC(p, n + incr);
         if (pr == NULL) {
-            PyMem_RawFree(p);
+            PyMem_FREE(p);
             PyErr_NoMemory();
             return NULL;
         }
@@ -298,9 +156,9 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
             break;
         n += strlen(p+n);
     }
-    pr = (char *)PyMem_RawRealloc(p, n+1);
+    pr = (char *)PyMem_REALLOC(p, n+1);
     if (pr == NULL) {
-        PyMem_RawFree(p);
+        PyMem_FREE(p);
         PyErr_NoMemory();
         return NULL;
     }
@@ -313,18 +171,17 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
 
    Note: Python expects in return a buffer allocated with PyMem_Malloc. */
 
-char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *) = NULL;
+char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *);
 
 
 /* Interface used by tokenizer.c and bltinmodule.c */
 
 char *
-PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
 {
-    char *rv, *res;
-    size_t len;
+    char *rv;
 
-    if (_PyOS_ReadlineTState == _PyThreadState_GET()) {
+    if (_PyOS_ReadlineTState == PyThreadState_GET()) {
         PyErr_SetString(PyExc_RuntimeError,
                         "can't re-enter readline");
         return NULL;
@@ -332,20 +189,24 @@ PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
 
 
     if (PyOS_ReadlineFunctionPointer == NULL) {
+#ifdef __VMS
+        PyOS_ReadlineFunctionPointer = vms__StdioReadline;
+#else
         PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
+#endif
     }
 
+#ifdef WITH_THREAD
     if (_PyOS_ReadlineLock == NULL) {
         _PyOS_ReadlineLock = PyThread_allocate_lock();
-        if (_PyOS_ReadlineLock == NULL) {
-            PyErr_SetString(PyExc_MemoryError, "can't allocate lock");
-            return NULL;
-        }
     }
+#endif
 
-    _PyOS_ReadlineTState = _PyThreadState_GET();
+    _PyOS_ReadlineTState = PyThreadState_GET();
     Py_BEGIN_ALLOW_THREADS
+#ifdef WITH_THREAD
     PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
+#endif
 
     /* This is needed to handle the unlikely case that the
      * interpreter is in interactive mode *and* stdin/out are not
@@ -359,22 +220,11 @@ PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
                                              prompt);
     Py_END_ALLOW_THREADS
 
+#ifdef WITH_THREAD
     PyThread_release_lock(_PyOS_ReadlineLock);
+#endif
 
     _PyOS_ReadlineTState = NULL;
 
-    if (rv == NULL)
-        return NULL;
-
-    len = strlen(rv) + 1;
-    res = PyMem_Malloc(len);
-    if (res != NULL) {
-        memcpy(res, rv, len);
-    }
-    else {
-        PyErr_NoMemory();
-    }
-    PyMem_RawFree(rv);
-
-    return res;
+    return rv;
 }
diff --git a/Parser/node.c b/Parser/node.c
index f1b70e0..0dea30f 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -13,8 +13,6 @@ PyNode_New(int type)
     n->n_type = type;
     n->n_str = NULL;
     n->n_lineno = 0;
-    n->n_end_lineno = 0;
-    n->n_end_col_offset = -1;
     n->n_nchildren = 0;
     n->n_child = NULL;
     return n;
@@ -72,39 +70,19 @@ fancy_roundup(int n)
  * Note that this would be straightforward if a node stored its current
  * capacity.  The code is tricky to avoid that.
  */
-#define XXXROUNDUP(n) ((n) <= 1 ? (n) :                         \
-               (n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) :    \
+#define XXXROUNDUP(n) ((n) <= 1 ? (n) :                 \
+               (n) <= 128 ? (((n) + 3) & ~3) :          \
                fancy_roundup(n))
 
 
-void
-_PyNode_FinalizeEndPos(node *n)
-{
-    int nch = NCH(n);
-    node *last;
-    if (nch == 0) {
-        return;
-    }
-    last = CHILD(n, nch - 1);
-    _PyNode_FinalizeEndPos(last);
-    n->n_end_lineno = last->n_end_lineno;
-    n->n_end_col_offset = last->n_end_col_offset;
-}
-
 int
-PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset,
-                int end_lineno, int end_col_offset)
+PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
 {
     const int nch = n1->n_nchildren;
     int current_capacity;
     int required_capacity;
     node *n;
 
-    // finalize end position of previous node (if any)
-    if (nch > 0) {
-        _PyNode_FinalizeEndPos(CHILD(n1, nch - 1));
-    }
-
     if (nch == INT_MAX || nch < 0)
         return E_OVERFLOW;
 
@@ -113,7 +91,7 @@ PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset,
     if (current_capacity < 0 || required_capacity < 0)
         return E_OVERFLOW;
     if (current_capacity < required_capacity) {
-        if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
+        if (required_capacity > PY_SIZE_MAX / sizeof(node)) {
             return E_NOMEM;
         }
         n = n1->n_child;
@@ -129,8 +107,6 @@ PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset,
     n->n_str = str;
     n->n_lineno = lineno;
     n->n_col_offset = col_offset;
-    n->n_end_lineno = end_lineno;  // this and below will be updates after all children are added.
-    n->n_end_col_offset = end_col_offset;
     n->n_nchildren = 0;
     n->n_child = NULL;
     return 0;
diff --git a/Parser/parser.c b/Parser/parser.c
index 227b918..b753a17 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -6,12 +6,12 @@
 /* XXX To do: error recovery */
 
 #include "Python.h"
+#include "pgenheaders.h"
 #include "token.h"
 #include "grammar.h"
 #include "node.h"
 #include "parser.h"
 #include "errcode.h"
-#include "graminit.h"
 
 
 #ifdef Py_DEBUG
@@ -35,9 +35,9 @@ s_reset(stack *s)
 #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
 
 static int
-s_push(stack *s, const dfa *d, node *parent)
+s_push(register stack *s, dfa *d, node *parent)
 {
-    stackentry *top;
+    register stackentry *top;
     if (s->s_top == s->s_base) {
         fprintf(stderr, "s_push: parser stack overflow\n");
         return E_NOMEM;
@@ -52,7 +52,7 @@ s_push(stack *s, const dfa *d, node *parent)
 #ifdef Py_DEBUG
 
 static void
-s_pop(stack *s)
+s_pop(register stack *s)
 {
     if (s_empty(s))
         Py_FatalError("s_pop: parser stack underflow -- FATAL");
@@ -105,13 +105,11 @@ PyParser_Delete(parser_state *ps)
 /* PARSER STACK OPERATIONS */
 
 static int
-shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset,
-      int end_lineno, int end_col_offset)
+shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
 {
     int err;
     assert(!s_empty(s));
-    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset,
-                          end_lineno, end_col_offset);
+    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
     if (err)
         return err;
     s->s_top->s_state = newstate;
@@ -119,15 +117,13 @@ shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset,
 }
 
 static int
-push(stack *s, int type, const dfa *d, int newstate, int lineno, int col_offset,
-     int end_lineno, int end_col_offset)
+push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
 {
     int err;
-    node *n;
+    register node *n;
     n = s->s_top->s_parent;
     assert(!s_empty(s));
-    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset,
-                          end_lineno, end_col_offset);
+    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
     if (err)
         return err;
     s->s_top->s_state = newstate;
@@ -138,38 +134,34 @@ push(stack *s, int type, const dfa *d, int newstate, int lineno, int col_offset,
 /* PARSER PROPER */
 
 static int
-classify(parser_state *ps, int type, const char *str)
+classify(parser_state *ps, int type, char *str)
 {
     grammar *g = ps->p_grammar;
-    int n = g->g_ll.ll_nlabels;
+    register int n = g->g_ll.ll_nlabels;
 
     if (type == NAME) {
-        const label *l = g->g_ll.ll_label;
-        int i;
+        register char *s = str;
+        register label *l = g->g_ll.ll_label;
+        register int i;
         for (i = n; i > 0; i--, l++) {
             if (l->lb_type != NAME || l->lb_str == NULL ||
-                l->lb_str[0] != str[0] ||
-                strcmp(l->lb_str, str) != 0)
+                l->lb_str[0] != s[0] ||
+                strcmp(l->lb_str, s) != 0)
                 continue;
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-#if 0
-            /* Leaving this in as an example */
-            if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
-                if (str[0] == 'w' && strcmp(str, "with") == 0)
-                    break; /* not a keyword yet */
-                else if (str[0] == 'a' && strcmp(str, "as") == 0)
-                    break; /* not a keyword yet */
+            if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
+                s[0] == 'p' && strcmp(s, "print") == 0) {
+                break; /* no longer a keyword */
             }
 #endif
-#endif
             D(printf("It's a keyword\n"));
             return n - i;
         }
     }
 
     {
-        const label *l = g->g_ll.ll_label;
-        int i;
+        register label *l = g->g_ll.ll_label;
+        register int i;
         for (i = n; i > 0; i--, l++) {
             if (l->lb_type == type && l->lb_str == NULL) {
                 D(printf("It's a token we know\n"));
@@ -183,8 +175,6 @@ classify(parser_state *ps, int type, const char *str)
 }
 
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-#if 0
-/* Leaving this in as an example */
 static void
 future_hack(parser_state *ps)
 {
@@ -224,16 +214,13 @@ future_hack(parser_state *ps)
         }
     }
 }
-#endif
 #endif /* future keyword */
 
 int
-PyParser_AddToken(parser_state *ps, int type, char *str,
-                  int lineno, int col_offset,
-                  int end_lineno, int end_col_offset,
-                  int *expected_ret)
+PyParser_AddToken(register parser_state *ps, register int type, char *str,
+                  int lineno, int col_offset, int *expected_ret)
 {
-    int ilabel;
+    register int ilabel;
     int err;
 
     D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
@@ -246,43 +233,34 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
     /* Loop until the token is shifted or an error occurred */
     for (;;) {
         /* Fetch the current dfa and state */
-        const dfa *d = ps->p_stack.s_top->s_dfa;
-        state *s = &d->d_state[ps->p_stack.s_top->s_state];
+        register dfa *d = ps->p_stack.s_top->s_dfa;
+        register state *s = &d->d_state[ps->p_stack.s_top->s_state];
 
         D(printf(" DFA '%s', state %d:",
             d->d_name, ps->p_stack.s_top->s_state));
 
         /* Check accelerator */
         if (s->s_lower <= ilabel && ilabel < s->s_upper) {
-            int x = s->s_accel[ilabel - s->s_lower];
+            register int x = s->s_accel[ilabel - s->s_lower];
             if (x != -1) {
                 if (x & (1<<7)) {
                     /* Push non-terminal */
                     int nt = (x >> 8) + NT_OFFSET;
                     int arrow = x & ((1<<7)-1);
-                    if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
-                        /* When parsing type comments is not requested,
-                           we can provide better errors about bad indentation
-                           by using 'suite' for the body of a funcdef */
-                        D(printf(" [switch func_body_suite to suite]"));
-                        nt = suite;
-                    }
-                    const dfa *d1 = PyGrammar_FindDFA(
+                    dfa *d1 = PyGrammar_FindDFA(
                         ps->p_grammar, nt);
                     if ((err = push(&ps->p_stack, nt, d1,
-                        arrow, lineno, col_offset,
-                        end_lineno, end_col_offset)) > 0) {
+                        arrow, lineno, col_offset)) > 0) {
                         D(printf(" MemError: push\n"));
                         return err;
                     }
-                    D(printf(" Push '%s'\n", d1->d_name));
+                    D(printf(" Push ...\n"));
                     continue;
                 }
 
                 /* Shift the token */
                 if ((err = shift(&ps->p_stack, type, str,
-                                x, lineno, col_offset,
-                                end_lineno, end_col_offset)) > 0) {
+                                x, lineno, col_offset)) > 0) {
                     D(printf(" MemError: shift.\n"));
                     return err;
                 }
@@ -296,13 +274,11 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
                              d->d_name,
                              ps->p_stack.s_top->s_state));
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-#if 0
                     if (d->d_name[0] == 'i' &&
                         strcmp(d->d_name,
                            "import_stmt") == 0)
                         future_hack(ps);
 #endif
-#endif
                     s_pop(&ps->p_stack);
                     if (s_empty(&ps->p_stack)) {
                         D(printf("  ACCEPT.\n"));
@@ -316,12 +292,10 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
 
         if (s->s_accept) {
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-#if 0
             if (d->d_name[0] == 'i' &&
                 strcmp(d->d_name, "import_stmt") == 0)
                 future_hack(ps);
 #endif
-#endif
             /* Pop this dfa and try again */
             s_pop(&ps->p_stack);
             D(printf(" Pop ...\n"));
diff --git a/Parser/parser.h b/Parser/parser.h
index b16075e..403236d 100644
--- a/Parser/parser.h
+++ b/Parser/parser.h
@@ -7,42 +7,35 @@ extern "C" {
 
 /* Parser interface */
 
-#define MAXSTACK 1700
+#define MAXSTACK 1500
 
 typedef struct {
-    int              s_state;       /* State in current DFA */
-    const dfa       *s_dfa;         /* Current DFA */
-    struct _node    *s_parent;      /* Where to add next node */
+	int		 s_state;	/* State in current DFA */
+	dfa		*s_dfa;		/* Current DFA */
+	struct _node	*s_parent;	/* Where to add next node */
 } stackentry;
 
 typedef struct {
-    stackentry      *s_top;         /* Top entry */
-    stackentry       s_base[MAXSTACK];/* Array of stack entries */
-                                    /* NB The stack grows down */
+	stackentry	*s_top;		/* Top entry */
+	stackentry	 s_base[MAXSTACK];/* Array of stack entries */
+					/* NB The stack grows down */
 } stack;
 
 typedef struct {
-    stack           p_stack;        /* Stack of parser states */
-    grammar         *p_grammar;     /* Grammar to use */
-    node            *p_tree;        /* Top of parse tree */
+	stack	 	p_stack;	/* Stack of parser states */
+	grammar		*p_grammar;	/* Grammar to use */
+	node		*p_tree;	/* Top of parse tree */
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-    unsigned long   p_flags;        /* see co_flags in Include/code.h */
+	unsigned long	p_flags;	/* see co_flags in Include/code.h */
 #endif
 } parser_state;
 
 parser_state *PyParser_New(grammar *g, int start);
 void PyParser_Delete(parser_state *ps);
-int PyParser_AddToken(parser_state *ps, int type, char *str,
-                      int lineno, int col_offset,
-                      int end_lineno, int end_col_offset,
+int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
                       int *expected_ret);
 void PyGrammar_AddAccelerators(grammar *g);
 
-
-#define showtree _Py_showtree
-#define printtree _Py_printtree
-#define dumptree _Py_dumptree
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index a5d7897..a5e9222 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -1,7 +1,7 @@
 
 /* Parser-tokenizer link implementation */
 
-#include "Python.h"
+#include "pgenheaders.h"
 #include "tokenizer.h"
 #include "node.h"
 #include "grammar.h"
@@ -10,53 +10,12 @@
 #include "errcode.h"
 #include "graminit.h"
 
+int Py_TabcheckFlag;
+
 
 /* Forward */
 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
-static int initerr(perrdetail *err_ret, PyObject * filename);
-
-typedef struct {
-    struct {
-        int lineno;
-        char *comment;
-    } *items;
-    size_t size;
-    size_t num_items;
-} growable_comment_array;
-
-static int
-growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
-    assert(initial_size > 0);
-    arr->items = malloc(initial_size * sizeof(*arr->items));
-    arr->size = initial_size;
-    arr->num_items = 0;
-
-    return arr->items != NULL;
-}
-
-static int
-growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
-    if (arr->num_items >= arr->size) {
-        arr->size *= 2;
-        arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
-        if (!arr->items) {
-            return 0;
-        }
-    }
-
-    arr->items[arr->num_items].lineno = lineno;
-    arr->items[arr->num_items].comment = comment;
-    arr->num_items++;
-    return 1;
-}
-
-static void
-growable_comment_array_deallocate(growable_comment_array *arr) {
-    for (unsigned i = 0; i < arr->num_items; i++) {
-        PyObject_FREE(arr->items[i].comment);
-    }
-    free(arr->items);
-}
+static void initerr(perrdetail *err_ret, const char* filename);
 
 /* Parse input coming from a string.  Return error code, print some errors. */
 node *
@@ -84,135 +43,74 @@ PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
 }
 
 node *
-PyParser_ParseStringObject(const char *s, PyObject *filename,
-                           grammar *g, int start,
-                           perrdetail *err_ret, int *flags)
+PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
+                          grammar *g, int start,
+                          perrdetail *err_ret, int *flags)
 {
     struct tok_state *tok;
-    int exec_input = start == file_input;
 
-    if (initerr(err_ret, filename) < 0)
-        return NULL;
-
-    if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
-        err_ret->error = E_ERROR;
-        return NULL;
-    }
+    initerr(err_ret, filename);
 
-    if (*flags & PyPARSE_IGNORE_COOKIE)
-        tok = PyTokenizer_FromUTF8(s, exec_input);
-    else
-        tok = PyTokenizer_FromString(s, exec_input);
-    if (tok == NULL) {
+    if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
         return NULL;
     }
-    if (*flags & PyPARSE_TYPE_COMMENTS) {
-        tok->type_comments = 1;
+
+    tok->filename = filename ? filename : "<string>";
+    if (Py_TabcheckFlag || Py_VerboseFlag) {
+        tok->altwarning = (tok->filename != NULL);
+        if (Py_TabcheckFlag >= 2)
+            tok->alterror++;
     }
 
-    Py_INCREF(err_ret->filename);
-    tok->filename = err_ret->filename;
-    if (*flags & PyPARSE_ASYNC_HACKS)
-        tok->async_hacks = 1;
     return parsetok(tok, g, start, err_ret, flags);
 }
 
-node *
-PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
-                          grammar *g, int start,
-                          perrdetail *err_ret, int *flags)
-{
-    node *n;
-    PyObject *filename = NULL;
-    if (filename_str != NULL) {
-        filename = PyUnicode_DecodeFSDefault(filename_str);
-        if (filename == NULL) {
-            err_ret->error = E_ERROR;
-            return NULL;
-        }
-    }
-    n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
-    Py_XDECREF(filename);
-    return n;
-}
-
 /* Parse input coming from a file.  Return error code, print some errors. */
 
 node *
 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
-                   const char *ps1, const char *ps2,
-                   perrdetail *err_ret)
+                   char *ps1, char *ps2, perrdetail *err_ret)
 {
-    return PyParser_ParseFileFlags(fp, filename, NULL,
-                                   g, start, ps1, ps2, err_ret, 0);
+    return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
+                                   err_ret, 0);
 }
 
 node *
-PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
-                        grammar *g, int start,
-                        const char *ps1, const char *ps2,
-                        perrdetail *err_ret, int flags)
+PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
+                        char *ps1, char *ps2, perrdetail *err_ret, int flags)
 {
     int iflags = flags;
-    return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
-                                     ps2, err_ret, &iflags);
+    return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
 }
 
 node *
-PyParser_ParseFileObject(FILE *fp, PyObject *filename,
-                         const char *enc, grammar *g, int start,
-                         const char *ps1, const char *ps2,
-                         perrdetail *err_ret, int *flags)
+PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
+                          char *ps1, char *ps2, perrdetail *err_ret, int *flags)
 {
     struct tok_state *tok;
 
-    if (initerr(err_ret, filename) < 0)
-        return NULL;
+    initerr(err_ret, filename);
 
-    if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
-        return NULL;
-    }
-
-    if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
+    if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {
         err_ret->error = E_NOMEM;
         return NULL;
     }
-    if (*flags & PyPARSE_TYPE_COMMENTS) {
-        tok->type_comments = 1;
+    tok->filename = filename;
+    if (Py_TabcheckFlag || Py_VerboseFlag) {
+        tok->altwarning = (filename != NULL);
+        if (Py_TabcheckFlag >= 2)
+            tok->alterror++;
     }
-    Py_INCREF(err_ret->filename);
-    tok->filename = err_ret->filename;
-    return parsetok(tok, g, start, err_ret, flags);
-}
 
-node *
-PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
-                          const char *enc, grammar *g, int start,
-                          const char *ps1, const char *ps2,
-                          perrdetail *err_ret, int *flags)
-{
-    node *n;
-    PyObject *fileobj = NULL;
-    if (filename != NULL) {
-        fileobj = PyUnicode_DecodeFSDefault(filename);
-        if (fileobj == NULL) {
-            err_ret->error = E_ERROR;
-            return NULL;
-        }
-    }
-    n = PyParser_ParseFileObject(fp, fileobj, enc, g,
-                                 start, ps1, ps2, err_ret, flags);
-    Py_XDECREF(fileobj);
-    return n;
+    return parsetok(tok, g, start, err_ret, flags);
 }
 
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
 #if 0
-static const char with_msg[] =
+static char with_msg[] =
 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
 
-static const char as_msg[] =
+static char as_msg[] =
 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
 
 static void
@@ -223,7 +121,6 @@ warn(const char *msg, const char *filename, int lineno)
     PySys_WriteStderr(msg, filename, lineno);
 }
 #endif
-#endif
 
 /* Parse input coming from the given tokenizer structure.
    Return error code. */
@@ -235,25 +132,21 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     parser_state *ps;
     node *n;
     int started = 0;
-    int col_offset, end_col_offset;
-    growable_comment_array type_ignores;
-
-    if (!growable_comment_array_init(&type_ignores, 10)) {
-        err_ret->error = E_NOMEM;
-        PyTokenizer_Free(tok);
-        return NULL;
-    }
 
     if ((ps = PyParser_New(g, start)) == NULL) {
+        fprintf(stderr, "no mem for new parser\n");
         err_ret->error = E_NOMEM;
         PyTokenizer_Free(tok);
         return NULL;
     }
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-    if (*flags & PyPARSE_BARRY_AS_BDFL)
-        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
-    if (*flags & PyPARSE_TYPE_COMMENTS)
-        ps->p_flags |= PyCF_TYPE_COMMENTS;
+    if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
+        ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
+    }
+    if (*flags & PyPARSE_UNICODE_LITERALS) {
+        ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
+    }
+
 #endif
 
     for (;;) {
@@ -261,9 +154,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int type;
         size_t len;
         char *str;
-        col_offset = -1;
-        int lineno;
-        const char *line_start;
+        int col_offset;
 
         type = PyTokenizer_Get(tok, &a, &b);
         if (type == ERRORTOKEN) {
@@ -288,6 +179,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         len = (a != NULL && b != NULL) ? b - a : 0;
         str = (char *) PyObject_MALLOC(len + 1);
         if (str == NULL) {
+            fprintf(stderr, "no mem for next token\n");
             err_ret->error = E_NOMEM;
             break;
         }
@@ -296,56 +188,16 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         str[len] = '\0';
 
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
-        if (type == NOTEQUAL) {
-            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
-                            strcmp(str, "!=")) {
-                PyObject_FREE(str);
-                err_ret->error = E_SYNTAX;
-                break;
-            }
-            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
-                            strcmp(str, "<>")) {
-                PyObject_FREE(str);
-                err_ret->expected = NOTEQUAL;
-                err_ret->error = E_SYNTAX;
-                break;
-            }
-        }
 #endif
-
-        /* Nodes of type STRING, especially multi line strings
-           must be handled differently in order to get both
-           the starting line number and the column offset right.
-           (cf. issue 16806) */
-        lineno = type == STRING ? tok->first_lineno : tok->lineno;
-        line_start = type == STRING ? tok->multi_line_start : tok->line_start;
-        if (a != NULL && a >= line_start) {
-            col_offset = Py_SAFE_DOWNCAST(a - line_start,
-                                          intptr_t, int);
+        if (a != NULL && a >= tok->line_start) {
+            col_offset = a - tok->line_start;
         }
         else {
             col_offset = -1;
         }
 
-        if (b != NULL && b >= tok->line_start) {
-            end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
-                                              intptr_t, int);
-        }
-        else {
-            end_col_offset = -1;
-        }
-
-        if (type == TYPE_IGNORE) {
-            if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
-                err_ret->error = E_NOMEM;
-                break;
-            }
-            continue;
-        }
-
         if ((err_ret->error =
-             PyParser_AddToken(ps, (int)type, str,
-                               lineno, col_offset, tok->lineno, end_col_offset,
+             PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
                                &(err_ret->expected))) != E_OK) {
             if (err_ret->error != E_DONE) {
                 PyObject_FREE(str);
@@ -358,87 +210,38 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
     if (err_ret->error == E_DONE) {
         n = ps->p_tree;
         ps->p_tree = NULL;
-
-        if (n->n_type == file_input) {
-            /* Put type_ignore nodes in the ENDMARKER of file_input. */
-            int num;
-            node *ch;
-            size_t i;
-
-            num = NCH(n);
-            ch = CHILD(n, num - 1);
-            REQ(ch, ENDMARKER);
-
-            for (i = 0; i < type_ignores.num_items; i++) {
-                int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
-                                          type_ignores.items[i].lineno, 0,
-                                          type_ignores.items[i].lineno, 0);
-                if (res != 0) {
-                    err_ret->error = res;
-                    PyNode_Free(n);
-                    n = NULL;
-                    break;
-                }
-                type_ignores.items[i].comment = NULL;
-            }
-        }
-
-        /* Check that the source for a single input statement really
-           is a single statement by looking at what is left in the
-           buffer after parsing.  Trailing whitespace and comments
-           are OK.  */
-        if (err_ret->error == E_DONE && start == single_input) {
-            char *cur = tok->cur;
-            char c = *tok->cur;
-
-            for (;;) {
-                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
-                    c = *++cur;
-
-                if (!c)
-                    break;
-
-                if (c != '#') {
-                    err_ret->error = E_BADSINGLE;
-                    PyNode_Free(n);
-                    n = NULL;
-                    break;
-                }
-
-                /* Suck up comment. */
-                while (c && c != '\n')
-                    c = *++cur;
-            }
-        }
     }
     else
         n = NULL;
 
-    growable_comment_array_deallocate(&type_ignores);
-
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
     *flags = ps->p_flags;
 #endif
     PyParser_Delete(ps);
 
     if (n == NULL) {
-        if (tok->done == E_EOF)
+        if (tok->lineno <= 1 && tok->done == E_EOF)
             err_ret->error = E_EOF;
         err_ret->lineno = tok->lineno;
         if (tok->buf != NULL) {
+            char *text = NULL;
             size_t len;
             assert(tok->cur - tok->buf < INT_MAX);
-            /* if we've managed to parse a token, point the offset to its start,
-             * else use the current reading position of the tokenizer
-             */
-            err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
+            err_ret->offset = (int)(tok->cur - tok->buf);
             len = tok->inp - tok->buf;
-            err_ret->text = (char *) PyObject_MALLOC(len + 1);
-            if (err_ret->text != NULL) {
-                if (len > 0)
-                    strncpy(err_ret->text, tok->buf, len);
-                err_ret->text[len] = '\0';
+#ifdef Py_USING_UNICODE
+            text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
+
+#endif
+            if (text == NULL) {
+                text = (char *) PyObject_MALLOC(len + 1);
+                if (text != NULL) {
+                    if (len > 0)
+                        strncpy(text, tok->buf, len);
+                    text[len] = '\0';
+                }
             }
+            err_ret->text = text;
         }
     } else if (tok->encoding != NULL) {
         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
@@ -465,31 +268,17 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 done:
     PyTokenizer_Free(tok);
 
-    if (n != NULL) {
-        _PyNode_FinalizeEndPos(n);
-    }
     return n;
 }
 
-static int
-initerr(perrdetail *err_ret, PyObject *filename)
+static void
+initerr(perrdetail *err_ret, const char *filename)
 {
     err_ret->error = E_OK;
+    err_ret->filename = filename;
     err_ret->lineno = 0;
     err_ret->offset = 0;
     err_ret->text = NULL;
     err_ret->token = -1;
     err_ret->expected = -1;
-    if (filename) {
-        Py_INCREF(filename);
-        err_ret->filename = filename;
-    }
-    else {
-        err_ret->filename = PyUnicode_FromString("<string>");
-        if (err_ret->filename == NULL) {
-            err_ret->error = E_ERROR;
-            return -1;
-        }
-    }
-    return 0;
 }
diff --git a/Parser/pgen.c b/Parser/pgen.c
new file mode 100644
index 0000000..b20d976
--- /dev/null
+++ b/Parser/pgen.c
@@ -0,0 +1,726 @@
+/* Parser generator */
+
+/* For a description, see the comments at end of this file */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "token.h"
+#include "node.h"
+#include "grammar.h"
+#include "metagrammar.h"
+#include "pgen.h"
+
+extern int Py_DebugFlag;
+extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
+
+
+/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
+
+typedef struct _nfaarc {
+    int         ar_label;
+    int         ar_arrow;
+} nfaarc;
+
+typedef struct _nfastate {
+    int         st_narcs;
+    nfaarc      *st_arc;
+} nfastate;
+
+typedef struct _nfa {
+    int                 nf_type;
+    char                *nf_name;
+    int                 nf_nstates;
+    nfastate            *nf_state;
+    int                 nf_start, nf_finish;
+} nfa;
+
+/* Forward */
+static void compile_rhs(labellist *ll,
+                        nfa *nf, node *n, int *pa, int *pb);
+static void compile_alt(labellist *ll,
+                        nfa *nf, node *n, int *pa, int *pb);
+static void compile_item(labellist *ll,
+                         nfa *nf, node *n, int *pa, int *pb);
+static void compile_atom(labellist *ll,
+                         nfa *nf, node *n, int *pa, int *pb);
+
+static int
+addnfastate(nfa *nf)
+{
+    nfastate *st;
+
+    nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
+                                sizeof(nfastate) * (nf->nf_nstates + 1));
+    if (nf->nf_state == NULL)
+        Py_FatalError("out of mem");
+    st = &nf->nf_state[nf->nf_nstates++];
+    st->st_narcs = 0;
+    st->st_arc = NULL;
+    return st - nf->nf_state;
+}
+
+static void
+addnfaarc(nfa *nf, int from, int to, int lbl)
+{
+    nfastate *st;
+    nfaarc *ar;
+
+    st = &nf->nf_state[from];
+    st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
+                                  sizeof(nfaarc) * (st->st_narcs + 1));
+    if (st->st_arc == NULL)
+        Py_FatalError("out of mem");
+    ar = &st->st_arc[st->st_narcs++];
+    ar->ar_label = lbl;
+    ar->ar_arrow = to;
+}
+
+static nfa *
+newnfa(char *name)
+{
+    nfa *nf;
+    static int type = NT_OFFSET; /* All types will be disjunct */
+
+    nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
+    if (nf == NULL)
+        Py_FatalError("no mem for new nfa");
+    nf->nf_type = type++;
+    nf->nf_name = name; /* XXX strdup(name) ??? */
+    nf->nf_nstates = 0;
+    nf->nf_state = NULL;
+    nf->nf_start = nf->nf_finish = -1;
+    return nf;
+}
+
+typedef struct _nfagrammar {
+    int                 gr_nnfas;
+    nfa                 **gr_nfa;
+    labellist           gr_ll;
+} nfagrammar;
+
+/* Forward */
+static void compile_rule(nfagrammar *gr, node *n);
+
+static nfagrammar *
+newnfagrammar(void)
+{
+    nfagrammar *gr;
+
+    gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
+    if (gr == NULL)
+        Py_FatalError("no mem for new nfa grammar");
+    gr->gr_nnfas = 0;
+    gr->gr_nfa = NULL;
+    gr->gr_ll.ll_nlabels = 0;
+    gr->gr_ll.ll_label = NULL;
+    addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
+    return gr;
+}
+
+static void
+freenfagrammar(nfagrammar *gr)
+{
+    int i;
+    for (i = 0; i < gr->gr_nnfas; i++) {
+        PyObject_FREE(gr->gr_nfa[i]->nf_state);
+    }
+    PyObject_FREE(gr->gr_nfa);
+    PyObject_FREE(gr);
+}
+
+static nfa *
+addnfa(nfagrammar *gr, char *name)
+{
+    nfa *nf;
+
+    nf = newnfa(name);
+    gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
+                                  sizeof(nfa*) * (gr->gr_nnfas + 1));
+    if (gr->gr_nfa == NULL)
+        Py_FatalError("out of mem");
+    gr->gr_nfa[gr->gr_nnfas++] = nf;
+    addlabel(&gr->gr_ll, NAME, nf->nf_name);
+    return nf;
+}
+
+#ifdef Py_DEBUG
+
+static char REQNFMT[] = "metacompile: less than %d children\n";
+
+#define REQN(i, count) do { \
+    if (i < count) { \
+        fprintf(stderr, REQNFMT, count); \
+        Py_FatalError("REQN"); \
+    } \
+} while (0)
+
+#else
+#define REQN(i, count)  /* empty */
+#endif
+
+static nfagrammar *
+metacompile(node *n)
+{
+    nfagrammar *gr;
+    int i;
+
+    if (Py_DebugFlag)
+        printf("Compiling (meta-) parse tree into NFA grammar\n");
+    gr = newnfagrammar();
+    REQ(n, MSTART);
+    i = n->n_nchildren - 1; /* Last child is ENDMARKER */
+    n = n->n_child;
+    for (; --i >= 0; n++) {
+        if (n->n_type != NEWLINE)
+            compile_rule(gr, n);
+    }
+    return gr;
+}
+
+static void
+compile_rule(nfagrammar *gr, node *n)
+{
+    nfa *nf;
+
+    REQ(n, RULE);
+    REQN(n->n_nchildren, 4);
+    n = n->n_child;
+    REQ(n, NAME);
+    nf = addnfa(gr, n->n_str);
+    n++;
+    REQ(n, COLON);
+    n++;
+    REQ(n, RHS);
+    compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
+    n++;
+    REQ(n, NEWLINE);
+}
+
+static void
+compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, RHS);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    REQ(n, ALT);
+    compile_alt(ll, nf, n, pa, pb);
+    if (--i <= 0)
+        return;
+    n++;
+    a = *pa;
+    b = *pb;
+    *pa = addnfastate(nf);
+    *pb = addnfastate(nf);
+    addnfaarc(nf, *pa, a, EMPTY);
+    addnfaarc(nf, b, *pb, EMPTY);
+    for (; --i >= 0; n++) {
+        REQ(n, VBAR);
+        REQN(i, 1);
+        --i;
+        n++;
+        REQ(n, ALT);
+        compile_alt(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pa, a, EMPTY);
+        addnfaarc(nf, b, *pb, EMPTY);
+    }
+}
+
+static void
+compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, ALT);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    REQ(n, ITEM);
+    compile_item(ll, nf, n, pa, pb);
+    --i;
+    n++;
+    for (; --i >= 0; n++) {
+        REQ(n, ITEM);
+        compile_item(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pb, a, EMPTY);
+        *pb = b;
+    }
+}
+
+static void
+compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, ITEM);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    if (n->n_type == LSQB) {
+        REQN(i, 3);
+        n++;
+        REQ(n, RHS);
+        *pa = addnfastate(nf);
+        *pb = addnfastate(nf);
+        addnfaarc(nf, *pa, *pb, EMPTY);
+        compile_rhs(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pa, a, EMPTY);
+        addnfaarc(nf, b, *pb, EMPTY);
+        REQN(i, 1);
+        n++;
+        REQ(n, RSQB);
+    }
+    else {
+        compile_atom(ll, nf, n, pa, pb);
+        if (--i <= 0)
+            return;
+        n++;
+        addnfaarc(nf, *pb, *pa, EMPTY);
+        if (n->n_type == STAR)
+            *pb = *pa;
+        else
+            REQ(n, PLUS);
+    }
+}
+
+static void
+compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+
+    REQ(n, ATOM);
+    i = n->n_nchildren;
+    (void)i; /* Don't warn about set but unused */
+    REQN(i, 1);
+    n = n->n_child;
+    if (n->n_type == LPAR) {
+        REQN(i, 3);
+        n++;
+        REQ(n, RHS);
+        compile_rhs(ll, nf, n, pa, pb);
+        n++;
+        REQ(n, RPAR);
+    }
+    else if (n->n_type == NAME || n->n_type == STRING) {
+        *pa = addnfastate(nf);
+        *pb = addnfastate(nf);
+        addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
+    }
+    else
+        REQ(n, NAME);
+}
+
+static void
+dumpstate(labellist *ll, nfa *nf, int istate)
+{
+    nfastate *st;
+    int i;
+    nfaarc *ar;
+
+    printf("%c%2d%c",
+        istate == nf->nf_start ? '*' : ' ',
+        istate,
+        istate == nf->nf_finish ? '.' : ' ');
+    st = &nf->nf_state[istate];
+    ar = st->st_arc;
+    for (i = 0; i < st->st_narcs; i++) {
+        if (i > 0)
+            printf("\n    ");
+        printf("-> %2d  %s", ar->ar_arrow,
+            PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
+        ar++;
+    }
+    printf("\n");
+}
+
+static void
+dumpnfa(labellist *ll, nfa *nf)
+{
+    int i;
+
+    printf("NFA '%s' has %d states; start %d, finish %d\n",
+        nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
+    for (i = 0; i < nf->nf_nstates; i++)
+        dumpstate(ll, nf, i);
+}
+
+
+/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
+
+static void
+addclosure(bitset ss, nfa *nf, int istate)
+{
+    if (addbit(ss, istate)) {
+        nfastate *st = &nf->nf_state[istate];
+        nfaarc *ar = st->st_arc;
+        int i;
+
+        for (i = st->st_narcs; --i >= 0; ) {
+            if (ar->ar_label == EMPTY)
+                addclosure(ss, nf, ar->ar_arrow);
+            ar++;
+        }
+    }
+}
+
+typedef struct _ss_arc {
+    bitset      sa_bitset;
+    int         sa_arrow;
+    int         sa_label;
+} ss_arc;
+
+typedef struct _ss_state {
+    bitset      ss_ss;
+    int         ss_narcs;
+    struct _ss_arc      *ss_arc;
+    int         ss_deleted;
+    int         ss_finish;
+    int         ss_rename;
+} ss_state;
+
+typedef struct _ss_dfa {
+    int         sd_nstates;
+    ss_state *sd_state;
+} ss_dfa;
+
+/* Forward */
+static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+                       labellist *ll, char *msg);
+static void simplify(int xx_nstates, ss_state *xx_state);
+static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
+
+static void
+makedfa(nfagrammar *gr, nfa *nf, dfa *d)
+{
+    int nbits = nf->nf_nstates;
+    bitset ss;
+    int xx_nstates;
+    ss_state *xx_state, *yy;
+    ss_arc *zz;
+    int istate, jstate, iarc, jarc, ibit;
+    nfastate *st;
+    nfaarc *ar;
+    int i, j;
+
+    ss = newbitset(nbits);
+    addclosure(ss, nf, nf->nf_start);
+    xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
+    if (xx_state == NULL)
+        Py_FatalError("no mem for xx_state in makedfa");
+    xx_nstates = 1;
+    yy = &xx_state[0];
+    yy->ss_ss = ss;
+    yy->ss_narcs = 0;
+    yy->ss_arc = NULL;
+    yy->ss_deleted = 0;
+    yy->ss_finish = testbit(ss, nf->nf_finish);
+    if (yy->ss_finish)
+        printf("Error: nonterminal '%s' may produce empty.\n",
+            nf->nf_name);
+
+    /* This algorithm is from a book written before
+       the invention of structured programming... */
+
+    /* For each unmarked state... */
+    for (istate = 0; istate < xx_nstates; ++istate) {
+        size_t size;
+        yy = &xx_state[istate];
+        ss = yy->ss_ss;
+        /* For all its states... */
+        for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
+            if (!testbit(ss, ibit))
+                continue;
+            st = &nf->nf_state[ibit];
+            /* For all non-empty arcs from this state... */
+            for (iarc = 0; iarc < st->st_narcs; iarc++) {
+                ar = &st->st_arc[iarc];
+                if (ar->ar_label == EMPTY)
+                    continue;
+                /* Look up in list of arcs from this state */
+                for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
+                    zz = &yy->ss_arc[jarc];
+                    if (ar->ar_label == zz->sa_label)
+                        goto found;
+                }
+                /* Add new arc for this state */
+                size = sizeof(ss_arc) * (yy->ss_narcs + 1);
+                yy->ss_arc = (ss_arc *)PyObject_REALLOC(
+                                            yy->ss_arc, size);
+                if (yy->ss_arc == NULL)
+                    Py_FatalError("out of mem");
+                zz = &yy->ss_arc[yy->ss_narcs++];
+                zz->sa_label = ar->ar_label;
+                zz->sa_bitset = newbitset(nbits);
+                zz->sa_arrow = -1;
+             found:             ;
+                /* Add destination */
+                addclosure(zz->sa_bitset, nf, ar->ar_arrow);
+            }
+        }
+        /* Now look up all the arrow states */
+        for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
+            zz = &xx_state[istate].ss_arc[jarc];
+            for (jstate = 0; jstate < xx_nstates; jstate++) {
+                if (samebitset(zz->sa_bitset,
+                    xx_state[jstate].ss_ss, nbits)) {
+                    zz->sa_arrow = jstate;
+                    goto done;
+                }
+            }
+            size = sizeof(ss_state) * (xx_nstates + 1);
+            xx_state = (ss_state *)PyObject_REALLOC(xx_state,
+                                                        size);
+            if (xx_state == NULL)
+                Py_FatalError("out of mem");
+            zz->sa_arrow = xx_nstates;
+            yy = &xx_state[xx_nstates++];
+            yy->ss_ss = zz->sa_bitset;
+            yy->ss_narcs = 0;
+            yy->ss_arc = NULL;
+            yy->ss_deleted = 0;
+            yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
+         done:          ;
+        }
+    }
+
+    if (Py_DebugFlag)
+        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+                                        "before minimizing");
+
+    simplify(xx_nstates, xx_state);
+
+    if (Py_DebugFlag)
+        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+                                        "after minimizing");
+
+    convert(d, xx_nstates, xx_state);
+
+    for (i = 0; i < xx_nstates; i++) {
+        for (j = 0; j < xx_state[i].ss_narcs; j++)
+            delbitset(xx_state[i].ss_arc[j].sa_bitset);
+        PyObject_FREE(xx_state[i].ss_arc);
+    }
+    PyObject_FREE(xx_state);
+}
+
+static void
+printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+           labellist *ll, char *msg)
+{
+    int i, ibit, iarc;
+    ss_state *yy;
+    ss_arc *zz;
+
+    printf("Subset DFA %s\n", msg);
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        printf(" Subset %d", i);
+        if (yy->ss_finish)
+            printf(" (finish)");
+        printf(" { ");
+        for (ibit = 0; ibit < nbits; ibit++) {
+            if (testbit(yy->ss_ss, ibit))
+                printf("%d ", ibit);
+        }
+        printf("}\n");
+        for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
+            zz = &yy->ss_arc[iarc];
+            printf("  Arc to state %d, label %s\n",
+                zz->sa_arrow,
+                PyGrammar_LabelRepr(
+                    &ll->ll_label[zz->sa_label]));
+        }
+    }
+}
+
+
+/* PART THREE -- SIMPLIFY DFA */
+
+/* Simplify the DFA by repeatedly eliminating states that are
+   equivalent to another oner.  This is NOT Algorithm 3.3 from
+   [Aho&Ullman 77].  It does not always finds the minimal DFA,
+   but it does usually make a much smaller one...  (For an example
+   of sub-optimal behavior, try S: x a b+ | y a b+.)
+*/
+
+static int
+samestate(ss_state *s1, ss_state *s2)
+{
+    int i;
+
+    if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
+        return 0;
+    for (i = 0; i < s1->ss_narcs; i++) {
+        if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
+            s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
+            return 0;
+    }
+    return 1;
+}
+
+static void
+renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
+{
+    int i, j;
+
+    if (Py_DebugFlag)
+        printf("Rename state %d to %d.\n", from, to);
+    for (i = 0; i < xx_nstates; i++) {
+        if (xx_state[i].ss_deleted)
+            continue;
+        for (j = 0; j < xx_state[i].ss_narcs; j++) {
+            if (xx_state[i].ss_arc[j].sa_arrow == from)
+                xx_state[i].ss_arc[j].sa_arrow = to;
+        }
+    }
+}
+
+static void
+simplify(int xx_nstates, ss_state *xx_state)
+{
+    int changes;
+    int i, j;
+
+    do {
+        changes = 0;
+        for (i = 1; i < xx_nstates; i++) {
+            if (xx_state[i].ss_deleted)
+                continue;
+            for (j = 0; j < i; j++) {
+                if (xx_state[j].ss_deleted)
+                    continue;
+                if (samestate(&xx_state[i], &xx_state[j])) {
+                    xx_state[i].ss_deleted++;
+                    renamestates(xx_nstates, xx_state,
+                                 i, j);
+                    changes++;
+                    break;
+                }
+            }
+        }
+    } while (changes);
+}
+
+
+/* PART FOUR -- GENERATE PARSING TABLES */
+
+/* Convert the DFA into a grammar that can be used by our parser */
+
+static void
+convert(dfa *d, int xx_nstates, ss_state *xx_state)
+{
+    int i, j;
+    ss_state *yy;
+    ss_arc *zz;
+
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        yy->ss_rename = addstate(d);
+    }
+
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        for (j = 0; j < yy->ss_narcs; j++) {
+            zz = &yy->ss_arc[j];
+            addarc(d, yy->ss_rename,
+                xx_state[zz->sa_arrow].ss_rename,
+                zz->sa_label);
+        }
+        if (yy->ss_finish)
+            addarc(d, yy->ss_rename, yy->ss_rename, 0);
+    }
+
+    d->d_initial = 0;
+}
+
+
+/* PART FIVE -- GLUE IT ALL TOGETHER */
+
+static grammar *
+maketables(nfagrammar *gr)
+{
+    int i;
+    nfa *nf;
+    dfa *d;
+    grammar *g;
+
+    if (gr->gr_nnfas == 0)
+        return NULL;
+    g = newgrammar(gr->gr_nfa[0]->nf_type);
+                    /* XXX first rule must be start rule */
+    g->g_ll = gr->gr_ll;
+
+    for (i = 0; i < gr->gr_nnfas; i++) {
+        nf = gr->gr_nfa[i];
+        if (Py_DebugFlag) {
+            printf("Dump of NFA for '%s' ...\n", nf->nf_name);
+            dumpnfa(&gr->gr_ll, nf);
+            printf("Making DFA for '%s' ...\n", nf->nf_name);
+        }
+        d = adddfa(g, nf->nf_type, nf->nf_name);
+        makedfa(gr, gr->gr_nfa[i], d);
+    }
+
+    return g;
+}
+
+grammar *
+pgen(node *n)
+{
+    nfagrammar *gr;
+    grammar *g;
+
+    gr = metacompile(n);
+    g = maketables(gr);
+    translatelabels(g);
+    addfirstsets(g);
+    freenfagrammar(gr);
+    return g;
+}
+
+grammar *
+Py_pgen(node *n)
+{
+  return pgen(n);
+}
+
+/*
+
+Description
+-----------
+
+Input is a grammar in extended BNF (using * for repetition, + for
+at-least-once repetition, [] for optional parts, | for alternatives and
+() for grouping).  This has already been parsed and turned into a parse
+tree.
+
+Each rule is considered as a regular expression in its own right.
+It is turned into a Non-deterministic Finite Automaton (NFA), which
+is then turned into a Deterministic Finite Automaton (DFA), which is then
+optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
+or similar compiler books (this technique is more often used for lexical
+analyzers).
+
+The DFA's are used by the parser as parsing tables in a special way
+that's probably unique.  Before they are usable, the FIRST sets of all
+non-terminals are computed.
+
+Reference
+---------
+
+[Aho&Ullman 77]
+    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+    (first edition)
+
+*/
diff --git a/Parser/pgen/__init__.py b/Parser/pgen/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/Parser/pgen/__init__.py
+++ /dev/null
diff --git a/Parser/pgen/__main__.py b/Parser/pgen/__main__.py
deleted file mode 100644
index bb96e75..0000000
--- a/Parser/pgen/__main__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import argparse
-
-from .pgen import ParserGenerator
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Parser generator main program.")
-    parser.add_argument(
-        "grammar", type=str, help="The file with the grammar definition in EBNF format"
-    )
-    parser.add_argument("tokens", type=str, help="The file with the token definitions")
-    parser.add_argument(
-        "graminit_h",
-        type=argparse.FileType("w"),
-        help="The path to write the grammar's non-terminals as #defines",
-    )
-    parser.add_argument(
-        "graminit_c",
-        type=argparse.FileType("w"),
-        help="The path to write the grammar as initialized data",
-    )
-
-    parser.add_argument("--verbose", "-v", action="count")
-    args = parser.parse_args()
-
-    p = ParserGenerator(args.grammar, args.tokens, verbose=args.verbose)
-    grammar = p.make_grammar()
-    grammar.produce_graminit_h(args.graminit_h.write)
-    grammar.produce_graminit_c(args.graminit_c.write)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/Parser/pgen/automata.py b/Parser/pgen/automata.py
deleted file mode 100644
index 545a737..0000000
--- a/Parser/pgen/automata.py
+++ /dev/null
@@ -1,371 +0,0 @@
-"""Classes representing state-machine concepts"""
-
-class NFA:
-    """A non deterministic finite automata
-
-    A non deterministic automata is a form of a finite state
-    machine. An NFA's rules are less restrictive than a DFA.
-    The NFA rules are:
-
-      * A transition can be non-deterministic and can result in
-        nothing, one, or two or more states.
-
-      * An epsilon transition consuming empty input is valid.
-        Transitions consuming labeled symbols are also permitted.
-
-    This class assumes that there is only one starting state and one
-    accepting (ending) state.
-
-    Attributes:
-        name (str): The name of the rule the NFA is representing.
-        start (NFAState): The starting state.
-        end (NFAState): The ending state
-    """
-
-    def __init__(self, start, end):
-        self.name = start.rule_name
-        self.start = start
-        self.end = end
-
-    def __repr__(self):
-        return "NFA(start={}, end={})".format(self.start, self.end)
-
-    def dump(self, writer=print):
-        """Dump a graphical representation of the NFA"""
-        todo = [self.start]
-        for i, state in enumerate(todo):
-            writer("  State", i, state is self.end and "(final)" or "")
-            for arc in state.arcs:
-                label = arc.label
-                next = arc.target
-                if next in todo:
-                    j = todo.index(next)
-                else:
-                    j = len(todo)
-                    todo.append(next)
-                if label is None:
-                    writer("    -> %d" % j)
-                else:
-                    writer("    %s -> %d" % (label, j))
-
-
-class NFAArc:
-    """An arc representing a transition between two NFA states.
-
-    NFA states can be connected via two ways:
-
-        * A label transition: An input equal to the label must
-          be consumed to perform the transition.
-        * An epsilon transition: The transition can be taken without
-          consuming any input symbol.
-
-        Attributes:
-            target (NFAState): The ending state of the transition arc.
-            label (Optional[str]): The label that must be consumed to make
-                the transition. An epsilon transition is represented
-                using `None`.
-    """
-
-    def __init__(self, target, label):
-        self.target = target
-        self.label = label
-
-    def __repr__(self):
-        return "<%s: %s>" % (self.__class__.__name__, self.label)
-
-
-class NFAState:
-    """A state of a NFA, non deterministic finite automata.
-
-    Attributes:
-        target (rule_name): The name of the rule used to represent the NFA's
-            ending state after a transition.
-        arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
-            between the current NFA state and another NFA state via following
-            a label.
-    """
-
-    def __init__(self, rule_name):
-        self.rule_name = rule_name
-        self.arcs = []
-
-    def add_arc(self, target, label=None):
-        """Add a new arc to connect the state to a target state within the NFA
-
-        The method adds a new arc to the list of arcs available as transitions
-        from the present state. An optional label indicates a named transition
-        that consumes an input while the absence of a label represents an epsilon
-        transition.
-
-        Attributes:
-            target (NFAState): The end of the transition that the arc represents.
-            label (Optional[str]): The label that must be consumed for making
-                the transition. If the label is not provided the transition is assumed
-                to be an epsilon-transition.
-        """
-        assert label is None or isinstance(label, str)
-        assert isinstance(target, NFAState)
-        self.arcs.append(NFAArc(target, label))
-
-    def __repr__(self):
-        return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
-
-
-class DFA:
-    """A deterministic finite automata
-
-    A deterministic finite automata is a form of a finite state machine
-    that obeys the following rules:
-
-       * Each of the transitions is uniquely determined by
-         the source state and input symbol
-       * Reading an input symbol is required for each state
-         transition (no epsilon transitions).
-
-    The finite-state machine will accept or reject a string of symbols
-    and only produces a unique computation of the automaton for each input
-    string. The DFA must have a unique starting state (represented as the first
-    element in the list of states) but can have multiple final states.
-
-    Attributes:
-        name (str): The name of the rule the DFA is representing.
-        states (List[DFAState]): A collection of DFA states.
-    """
-
-    def __init__(self, name, states):
-        self.name = name
-        self.states = states
-
-    @classmethod
-    def from_nfa(cls, nfa):
-        """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
-
-        To simulate the operation of a DFA on a given input string, it's
-        necessary to keep track of a single state at any time, or more precisely,
-        the state that the automaton will reach after seeing a prefix of the
-        input. In contrast, to simulate an NFA, it's necessary to keep track of
-        a set of states: all of the states that the automaton could reach after
-        seeing the same prefix of the input, according to the nondeterministic
-        choices made by the automaton. There are two possible sources of
-        non-determinism:
-
-        1) Multiple (one or more) transitions with the same label
-
-                         'A'     +-------+
-                    +----------->+ State +----------->+
-                    |            |   2   |
-            +-------+            +-------+
-            | State |
-            |   1   |            +-------+
-            +-------+            | State |
-                    +----------->+   3   +----------->+
-                         'A'     +-------+
-
-        2) Epsilon transitions (transitions that can be taken without consuming any input)
-
-            +-------+            +-------+
-            | State |     ε      | State |
-            |   1   +----------->+   2   +----------->+
-            +-------+            +-------+
-
-        Looking at the first case above, we can't determine which transition should be
-        followed when given an input A. We could choose whether or not to follow the
-        transition while in the second case the problem is that we can choose both to
-        follow the transition or not doing it. To solve this problem we can imagine that
-        we follow all possibilities at the same time and we construct new states from the
-        set of all possible reachable states. For every case in the previous example:
-
-
-        1) For multiple transitions with the same label we colapse all of the
-           final states under the same one
-
-            +-------+            +-------+
-            | State |     'A'    | State |
-            |   1   +----------->+  2-3  +----------->+
-            +-------+            +-------+
-
-        2) For epsilon transitions we collapse all epsilon-reachable states
-           into the same one
-
-            +-------+
-            | State |
-            |  1-2  +----------->
-            +-------+
-
-        Because the DFA states consist of sets of NFA states, an n-state NFA
-        may be converted to a DFA with at most 2**n states. Notice that the
-        constructed DFA is not minimal and can be simplified or reduced
-        afterwards.
-
-        Parameters:
-            name (NFA): The NFA to transform to DFA.
-        """
-        assert isinstance(nfa, NFA)
-
-        def add_closure(nfa_state, base_nfa_set):
-            """Calculate the epsilon-closure of a given state
-
-            Add to the *base_nfa_set* all the states that are
-            reachable from *nfa_state* via epsilon-transitions.
-            """
-            assert isinstance(nfa_state, NFAState)
-            if nfa_state in base_nfa_set:
-                return
-            base_nfa_set.add(nfa_state)
-            for nfa_arc in nfa_state.arcs:
-                if nfa_arc.label is None:
-                    add_closure(nfa_arc.target, base_nfa_set)
-
-        # Calculte the epsilon-closure of the starting state
-        base_nfa_set = set()
-        add_closure(nfa.start, base_nfa_set)
-
-        # Start by visiting the NFA starting state (there is only one).
-        states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
-
-        for state in states:  # NB states grow while we're iterating
-
-            # Find transitions from the current state to other reachable states
-            # and store them in mapping that correlates the label to all the
-            # possible reachable states that can be obtained by consuming a
-            # token equal to the label. Each set of all the states that can
-            # be reached after following a label will be the a DFA state.
-            arcs = {}
-            for nfa_state in state.nfa_set:
-                for nfa_arc in nfa_state.arcs:
-                    if nfa_arc.label is not None:
-                        nfa_set = arcs.setdefault(nfa_arc.label, set())
-                        # All states that can be reached by epsilon-transitions
-                        # are also included in the set of reachable states.
-                        add_closure(nfa_arc.target, nfa_set)
-
-            # Now create new DFAs by visiting all posible transitions between
-            # the current DFA state and the new power-set states (each nfa_set)
-            # via the different labels. As the nodes are appended to *states* this
-            # is performing a breadth-first search traversal over the power-set of
-            # the states of the original NFA.
-            for label, nfa_set in sorted(arcs.items()):
-                for exisisting_state in states:
-                    if exisisting_state.nfa_set == nfa_set:
-                        # The DFA state already exists for this rule.
-                        next_state = exisisting_state
-                        break
-                else:
-                    next_state = DFAState(nfa.name, nfa_set, nfa.end)
-                    states.append(next_state)
-
-                # Add a transition between the current DFA state and the new
-                # DFA state (the power-set state) via the current label.
-                state.add_arc(next_state, label)
-
-        return cls(nfa.name, states)
-
-    def __iter__(self):
-        return iter(self.states)
-
-    def simplify(self):
-        """Attempt to reduce the number of states of the DFA
-
-        Transform the DFA into an equivalent DFA that has fewer states. Two
-        classes of states can be removed or merged from the original DFA without
-        affecting the language it accepts to minimize it:
-
-            * Unreachable states can not be reached from the initial
-              state of the DFA, for any input string.
-            * Nondistinguishable states are those that cannot be distinguished
-              from one another for any input string.
-
-        This algorithm does not achieve the optimal fully-reduced solution, but it
-        works well enough for the particularities of the Python grammar. The
-        algorithm repeatedly looks for two states that have the same set of
-        arcs (same labels pointing to the same nodes) and unifies them, until
-        things stop changing.
-        """
-        changes = True
-        while changes:
-            changes = False
-            for i, state_i in enumerate(self.states):
-                for j in range(i + 1, len(self.states)):
-                    state_j = self.states[j]
-                    if state_i == state_j:
-                        del self.states[j]
-                        for state in self.states:
-                            state.unifystate(state_j, state_i)
-                        changes = True
-                        break
-
-    def dump(self, writer=print):
-        """Dump a graphical representation of the DFA"""
-        for i, state in enumerate(self.states):
-            writer("  State", i, state.is_final and "(final)" or "")
-            for label, next in sorted(state.arcs.items()):
-                writer("    %s -> %d" % (label, self.states.index(next)))
-
-
-class DFAState(object):
-    """A state of a DFA
-
-    Attributes:
-        rule_name (rule_name): The name of the DFA rule containing the represented state.
-        nfa_set (Set[NFAState]): The set of NFA states used to create this state.
-        final (bool): True if the state represents an accepting state of the DFA
-            containing this state.
-        arcs (Dict[label, DFAState]): A mapping representing transitions between
-            the current DFA state and another DFA state via following a label.
-    """
-
-    def __init__(self, rule_name, nfa_set, final):
-        assert isinstance(nfa_set, set)
-        assert isinstance(next(iter(nfa_set)), NFAState)
-        assert isinstance(final, NFAState)
-        self.rule_name = rule_name
-        self.nfa_set = nfa_set
-        self.arcs = {}  # map from terminals/nonterminals to DFAState
-        self.is_final = final in nfa_set
-
-    def add_arc(self, target, label):
-        """Add a new arc to the current state.
-
-        Parameters:
-            target (DFAState): The DFA state at the end of the arc.
-            label (str): The label respresenting the token that must be consumed
-                to perform this transition.
-        """
-        assert isinstance(label, str)
-        assert label not in self.arcs
-        assert isinstance(target, DFAState)
-        self.arcs[label] = target
-
-    def unifystate(self, old, new):
-        """Replace all arcs from the current node to *old* with *new*.
-
-        Parameters:
-            old (DFAState): The  DFA state to remove from all existing arcs.
-            new (DFAState): The DFA state to replace in all existing arcs.
-        """
-        for label, next_ in self.arcs.items():
-            if next_ is old:
-                self.arcs[label] = new
-
-    def __eq__(self, other):
-        # The nfa_set does not matter for  equality
-        assert isinstance(other, DFAState)
-        if self.is_final != other.is_final:
-            return False
-        # We cannot just return self.arcs == other.arcs because that
-        # would invoke this method recursively if there are any cycles.
-        if len(self.arcs) != len(other.arcs):
-            return False
-        for label, next_ in self.arcs.items():
-            if next_ is not other.arcs.get(label):
-                return False
-        return True
-
-    __hash__ = None  # For Py3 compatibility.
-
-    def __repr__(self):
-        return "<%s: %s is_final=%s>" % (
-            self.__class__.__name__,
-            self.rule_name,
-            self.is_final,
-        )
diff --git a/Parser/pgen/grammar.py b/Parser/pgen/grammar.py
deleted file mode 100644
index ce40e16..0000000
--- a/Parser/pgen/grammar.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import collections
-
-
-class Grammar:
-    """Pgen parsing tables class.
-
-    The instance variables are as follows:
-
-    symbol2number -- a dict mapping symbol names to numbers.  Symbol
-                     numbers are always 256 or higher, to distinguish
-                     them from token numbers, which are between 0 and
-                     255 (inclusive).
-
-    number2symbol -- a dict mapping numbers to symbol names;
-                     these two are each other's inverse.
-
-    states        -- a list of DFAs, where each DFA is a list of
-                     states, each state is a list of arcs, and each
-                     arc is a (i, j) pair where i is a label and j is
-                     a state number.  The DFA number is the index into
-                     this list.  (This name is slightly confusing.)
-                     Final states are represented by a special arc of
-                     the form (0, j) where j is its own state number.
-
-    dfas          -- a dict mapping symbol numbers to (DFA, first)
-                     pairs, where DFA is an item from the states list
-                     above, and first is a set of tokens that can
-                     begin this grammar rule.
-
-    labels        -- a list of (x, y) pairs where x is either a token
-                     number or a symbol number, and y is either None
-                     or a string; the strings are keywords.  The label
-                     number is the index in this list; label numbers
-                     are used to mark state transitions (arcs) in the
-                     DFAs.
-
-    start         -- the number of the grammar's start symbol.
-
-    keywords      -- a dict mapping keyword strings to arc labels.
-
-    tokens        -- a dict mapping token numbers to arc labels.
-
-    """
-
-    def __init__(self):
-        self.symbol2number = collections.OrderedDict()
-        self.number2symbol = collections.OrderedDict()
-        self.states = []
-        self.dfas = collections.OrderedDict()
-        self.labels = [(0, "EMPTY")]
-        self.keywords = collections.OrderedDict()
-        self.tokens = collections.OrderedDict()
-        self.symbol2label = collections.OrderedDict()
-        self.start = 256
-
-    def produce_graminit_h(self, writer):
-        writer("/* Generated by Parser/pgen */\n\n")
-        for number, symbol in self.number2symbol.items():
-            writer("#define {} {}\n".format(symbol, number))
-
-    def produce_graminit_c(self, writer):
-        writer("/* Generated by Parser/pgen */\n\n")
-
-        writer('#include "exports.h"\n')
-        writer('#include "grammar.h"\n')
-        writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n")
-
-        self.print_dfas(writer)
-        self.print_labels(writer)
-
-        writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n")
-        writer("    {n_dfas},\n".format(n_dfas=len(self.dfas)))
-        writer("    dfas,\n")
-        writer("    {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
-        writer("    {start_number}\n".format(start_number=self.start))
-        writer("};\n")
-
-    def print_labels(self, writer):
-        writer(
-            "static const label labels[{n_labels}] = {{\n".format(
-                n_labels=len(self.labels)
-            )
-        )
-        for label, name in self.labels:
-            label_name = '"{}"'.format(name) if name is not None else 0
-            writer(
-                "    {{{label}, {label_name}}},\n".format(
-                    label=label, label_name=label_name
-                )
-            )
-        writer("};\n")
-
-    def print_dfas(self, writer):
-        self.print_states(writer)
-        writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
-        for dfaindex, dfa_elem in enumerate(self.dfas.items()):
-            symbol, (dfa, first_sets) = dfa_elem
-            writer(
-                '    {{{dfa_symbol}, "{symbol_name}", '.format(
-                    dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
-                )
-                + "{n_states}, states_{dfa_index},\n".format(
-                    n_states=len(dfa), dfa_index=dfaindex
-                )
-                + '     "'
-            )
-
-            bitset = bytearray((len(self.labels) >> 3) + 1)
-            for token in first_sets:
-                bitset[token >> 3] |= 1 << (token & 7)
-            for byte in bitset:
-                writer("\\%03o" % (byte & 0xFF))
-            writer('"},\n')
-        writer("};\n")
-
-    def print_states(self, write):
-        for dfaindex, dfa in enumerate(self.states):
-            self.print_arcs(write, dfaindex, dfa)
-            write(
-                "static state states_{dfa_index}[{n_states}] = {{\n".format(
-                    dfa_index=dfaindex, n_states=len(dfa)
-                )
-            )
-            for stateindex, state in enumerate(dfa):
-                narcs = len(state)
-                write(
-                    "    {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
-                        n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
-                    )
-                )
-            write("};\n")
-
-    def print_arcs(self, write, dfaindex, states):
-        for stateindex, state in enumerate(states):
-            narcs = len(state)
-            write(
-                "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
-                    dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
-                )
-            )
-            for a, b in state:
-                write(
-                    "    {{{from_label}, {to_state}}},\n".format(
-                        from_label=a, to_state=b
-                    )
-                )
-            write("};\n")
diff --git a/Parser/pgen/keywordgen.py b/Parser/pgen/keywordgen.py
deleted file mode 100644
index f0234a8..0000000
--- a/Parser/pgen/keywordgen.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
-
-import argparse
-
-from .pgen import ParserGenerator
-
-TEMPLATE = r'''
-"""Keywords (from "Grammar/Grammar")
-
-This file is automatically generated; please don't muck it up!
-
-To update the symbols in this file, 'cd' to the top directory of
-the python source tree and run:
-
-    python3 -m Parser.pgen.keywordgen Grammar/Grammar \
-                                      Grammar/Tokens \
-                                      Lib/keyword.py
-
-Alternatively, you can run 'make regen-keyword'.
-"""
-
-__all__ = ["iskeyword", "kwlist"]
-
-kwlist = [
-    {keywords}
-]
-
-iskeyword = frozenset(kwlist).__contains__
-'''.lstrip()
-
-EXTRA_KEYWORDS = ["async", "await"]
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Generate the Lib/keywords.py " "file from the grammar."
-    )
-    parser.add_argument(
-        "grammar", type=str, help="The file with the grammar definition in EBNF format"
-    )
-    parser.add_argument("tokens", type=str, help="The file with the token definitions")
-    parser.add_argument(
-        "keyword_file",
-        type=argparse.FileType("w"),
-        help="The path to write the keyword definitions",
-    )
-    args = parser.parse_args()
-    p = ParserGenerator(args.grammar, args.tokens)
-    grammar = p.make_grammar()
-
-    with args.keyword_file as thefile:
-        all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
-
-        keywords = ",\n    ".join(map(repr, all_keywords))
-        thefile.write(TEMPLATE.format(keywords=keywords))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/Parser/pgen/metaparser.py b/Parser/pgen/metaparser.py
deleted file mode 100644
index 074a083..0000000
--- a/Parser/pgen/metaparser.py
+++ /dev/null
@@ -1,152 +0,0 @@
-"""Parser for the Python metagrammar"""
-
-import io
-import tokenize  # from stdlib
-
-from .automata import NFA, NFAState
-
-
-class GrammarParser:
-    """Parser for Python grammar files."""
-
-    _translation_table = {
-        tokenize.NAME: "NAME",
-        tokenize.STRING: "STRING",
-        tokenize.NEWLINE: "NEWLINE",
-        tokenize.NL: "NL",
-        tokenize.OP: "OP",
-        tokenize.ENDMARKER: "ENDMARKER",
-        tokenize.COMMENT: "COMMENT",
-    }
-
-    def __init__(self, grammar):
-        self.grammar = grammar
-        grammar_adaptor = io.StringIO(grammar)
-        self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
-        self._gettoken()  # Initialize lookahead
-        self._current_rule_name = None
-
-    def parse(self):
-        """Turn the grammar into a collection of NFAs"""
-        # grammar: (NEWLINE | rule)* ENDMARKER
-        while self.type != tokenize.ENDMARKER:
-            while self.type == tokenize.NEWLINE:
-                self._gettoken()
-            # rule: NAME ':' rhs NEWLINE
-            self._current_rule_name = self._expect(tokenize.NAME)
-            self._expect(tokenize.OP, ":")
-            a, z = self._parse_rhs()
-            self._expect(tokenize.NEWLINE)
-
-            yield NFA(a, z)
-
-    def _parse_rhs(self):
-        # rhs: items ('|' items)*
-        a, z = self._parse_items()
-        if self.value != "|":
-            return a, z
-        else:
-            aa = NFAState(self._current_rule_name)
-            zz = NFAState(self._current_rule_name)
-            while True:
-                # Allow to transit directly to the previous state and connect the end of the
-                # previous state to the end of the current one, effectively allowing to skip
-                # the current state.
-                aa.add_arc(a)
-                z.add_arc(zz)
-                if self.value != "|":
-                    break
-
-                self._gettoken()
-                a, z = self._parse_items()
-            return aa, zz
-
-    def _parse_items(self):
-        # items: item+
-        a, b = self._parse_item()
-        while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
-            c, d = self._parse_item()
-            # Allow a transition between the end of the previous state
-            # and the beginning of the new one, connecting all the items
-            # together. In this way we can only reach the end if we visit
-            # all the items.
-            b.add_arc(c)
-            b = d
-        return a, b
-
-    def _parse_item(self):
-        # item: '[' rhs ']' | atom ['+' | '*']
-        if self.value == "[":
-            self._gettoken()
-            a, z = self._parse_rhs()
-            self._expect(tokenize.OP, "]")
-            # Make a transition from the beginning to the end so it is possible to
-            # advance for free to the next state of this item # without consuming
-            # anything from the rhs.
-            a.add_arc(z)
-            return a, z
-        else:
-            a, z = self._parse_atom()
-            value = self.value
-            if value not in ("+", "*"):
-                return a, z
-            self._gettoken()
-            z.add_arc(a)
-            if value == "+":
-                # Create a cycle to the beginning so we go back to the old state in this
-                # item and repeat.
-                return a, z
-            else:
-                # The end state is the same as the beginning, so we can cycle arbitrarily
-                # and end in the beginning if necessary.
-                return a, a
-
-    def _parse_atom(self):
-        # atom: '(' rhs ')' | NAME | STRING
-        if self.value == "(":
-            self._gettoken()
-            a, z = self._parse_rhs()
-            self._expect(tokenize.OP, ")")
-            return a, z
-        elif self.type in (tokenize.NAME, tokenize.STRING):
-            a = NFAState(self._current_rule_name)
-            z = NFAState(self._current_rule_name)
-            # We can transit to the next state only if we consume the value.
-            a.add_arc(z, self.value)
-            self._gettoken()
-            return a, z
-        else:
-            self._raise_error(
-                "expected (...) or NAME or STRING, got {} ({})",
-                self._translation_table.get(self.type, self.type),
-                self.value,
-            )
-
-    def _expect(self, type_, value=None):
-        if self.type != type_:
-            self._raise_error(
-                "expected {}, got {} ({})",
-                self._translation_table.get(type_, type_),
-                self._translation_table.get(self.type, self.type),
-                self.value,
-            )
-        if value is not None and self.value != value:
-            self._raise_error("expected {}, got {}", value, self.value)
-        value = self.value
-        self._gettoken()
-        return value
-
-    def _gettoken(self):
-        tup = next(self.generator)
-        while tup[0] in (tokenize.COMMENT, tokenize.NL):
-            tup = next(self.generator)
-        self.type, self.value, self.begin, self.end, self.line = tup
-
-    def _raise_error(self, msg, *args):
-        if args:
-            try:
-                msg = msg.format(*args)
-            except Exception:
-                msg = " ".join([msg] + list(map(str, args)))
-        line = self.grammar.splitlines()[self.begin[0] - 1]
-        raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))
diff --git a/Parser/pgen/pgen.py b/Parser/pgen/pgen.py
deleted file mode 100644
index 2f444eb..0000000
--- a/Parser/pgen/pgen.py
+++ /dev/null
@@ -1,305 +0,0 @@
-"""Python parser generator
-
-
-This parser generator transforms a Python grammar file into parsing tables
-that can be consumed by Python's LL(1) parser written in C.
-
-Concepts
---------
-
-* An LL(1) parser (Left-to-right, Leftmost derivation, 1 token-lookahead) is a
-  top-down parser for a subset of context-free languages. It parses the input
-  from Left to right, performing Leftmost derivation of the sentence, and can
-  only use 1 token of lookahead when parsing a sentence.
-
-* A parsing table is a collection of data that a generic implementation of the
-  LL(1) parser consumes to know how to parse a given context-free grammar. In
-  this case the collection of data involves Deterministic Finite Automatons,
-  calculated first sets, keywords and transition labels.
-
-* A grammar is defined by production rules (or just 'productions') that specify
-  which symbols may replace which other symbols; these rules may be used to
-  generate strings, or to parse them. Each such rule has a head, or left-hand
-  side, which consists of the string that may be replaced, and a body, or
-  right-hand side, which consists of a string that may replace it. In the
-  Python grammar, rules are written in the form
-
-  rule_name: rule_description;
-
-  meaning the rule 'a: b' specifies that a can be replaced by b. A context-free
-  grammar is a grammar in which the left-hand side of each production rule
-  consists of only a single nonterminal symbol. Context-free grammars can
-  always be recognized by a Non-Deterministic Automatons.
-
-* Terminal symbols are literal symbols which may appear in the outputs of the
-  production rules of the grammar and which cannot be changed using the rules
-  of the grammar. Applying the rules recursively to a source string of symbols
-  will usually terminate in a final output string consisting only of terminal
-  symbols.
-
-* Nonterminal symbols are those symbols which can be replaced. The grammar
-  includes a start symbol a designated member of the set of nonterminals from
-  which all the strings in the language may be derived by successive
-  applications of the production rules.
-
-* The language defined by the grammar is defined as the set of terminal strings
-  that can be derived using the production rules.
-
-* The first sets of a rule (FIRST(rule)) are defined to be the set of terminals
-  that can appear in the first position of any string derived from the rule.
-  This is useful for LL(1) parsers as the parser is only allowed to look at the
-  next token in the input to know which rule needs to parse. For example, given
-  this grammar:
-
-  start: '(' A | B ')'
-  A: 'a' '<'
-  B: 'b' '<'
-
-  and the input '(b<)' the parser can only look at 'b' to know if it needs
-  to parse A o B. Because FIRST(A) = {'a'} and FIRST(B) = {'b'} it knows
-  that needs to continue parsing rule B because only that rule can start
-  with 'b'.
-
-Description
------------
-
-The input for the parser generator is a grammar in extended BNF form (using *
-for repetition, + for at-least-once repetition, [] for optional parts, | for
-alternatives and () for grouping).
-
-Each rule in the grammar file is considered as a regular expression in its
-own right. It is turned into a Non-deterministic Finite Automaton (NFA),
-which is then turned into a Deterministic Finite Automaton (DFA), which is
-then optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
-or similar compiler books (this technique is more often used for lexical
-analyzers).
-
-The DFA's are used by the parser as parsing tables in a special way that's
-probably unique. Before they are usable, the FIRST sets of all non-terminals
-are computed so the LL(1) parser consuming the parsing tables can distinguish
-between different transitions.
-Reference
----------
-
-[Aho&Ullman 77]
-    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
-    (first edition)
-"""
-
-from ast import literal_eval
-import collections
-
-from . import grammar, token
-from .automata import DFA
-from .metaparser import GrammarParser
-
-import enum
-
-
-class LabelType(enum.Enum):
-    NONTERMINAL = 0
-    NAMED_TOKEN = 1
-    KEYWORD = 2
-    OPERATOR = 3
-    NONE = 4
-
-
-class Label(str):
-    def __init__(self, value):
-        self.type = self._get_type()
-
-    def _get_type(self):
-        if self[0].isalpha():
-            if self.upper() == self:
-                # NAMED tokens (ASYNC, NAME...) are all uppercase by convention
-                return LabelType.NAMED_TOKEN
-            else:
-                # If is not uppercase it must be a non terminal.
-                return LabelType.NONTERMINAL
-        else:
-            # Keywords and operators are wrapped in quotes
-            assert self[0] == self[-1] in ('"', "'"), self
-            value = literal_eval(self)
-            if value[0].isalpha():
-                return LabelType.KEYWORD
-            else:
-                return LabelType.OPERATOR
-
-    def __repr__(self):
-        return "{}({})".format(self.type, super().__repr__())
-
-
-class ParserGenerator(object):
-    def __init__(self, grammar_file, token_file, verbose=False):
-        with open(grammar_file) as f:
-            self.grammar = f.read()
-        with open(token_file) as tok_file:
-            token_lines = tok_file.readlines()
-        self.tokens = dict(token.generate_tokens(token_lines))
-        self.opmap = dict(token.generate_opmap(token_lines))
-        # Manually add <> so it does not collide with !=
-        self.opmap["<>"] = "NOTEQUAL"
-        self.verbose = verbose
-        self.filename = grammar_file
-        self.dfas, self.startsymbol = self.create_dfas()
-        self.first = {}  # map from symbol name to set of tokens
-        self.calculate_first_sets()
-
-    def create_dfas(self):
-        rule_to_dfas = collections.OrderedDict()
-        start_nonterminal = None
-        for nfa in GrammarParser(self.grammar).parse():
-            if self.verbose:
-                print("Dump of NFA for", nfa.name)
-                nfa.dump()
-            dfa = DFA.from_nfa(nfa)
-            if self.verbose:
-                print("Dump of DFA for", dfa.name)
-                dfa.dump()
-            dfa.simplify()
-            rule_to_dfas[dfa.name] = dfa
-
-            if start_nonterminal is None:
-                start_nonterminal = dfa.name
-
-        return rule_to_dfas, start_nonterminal
-
-    def make_grammar(self):
-        c = grammar.Grammar()
-        c.all_labels = set()
-        names = list(self.dfas.keys())
-        names.remove(self.startsymbol)
-        names.insert(0, self.startsymbol)
-        for name in names:
-            i = 256 + len(c.symbol2number)
-            c.symbol2number[Label(name)] = i
-            c.number2symbol[i] = Label(name)
-            c.all_labels.add(name)
-        for name in names:
-            self.make_label(c, name)
-            dfa = self.dfas[name]
-            states = []
-            for state in dfa:
-                arcs = []
-                for label, next in sorted(state.arcs.items()):
-                    c.all_labels.add(label)
-                    arcs.append((self.make_label(c, label), dfa.states.index(next)))
-                if state.is_final:
-                    arcs.append((0, dfa.states.index(state)))
-                states.append(arcs)
-            c.states.append(states)
-            c.dfas[c.symbol2number[name]] = (states, self.make_first_sets(c, name))
-        c.start = c.symbol2number[self.startsymbol]
-
-        if self.verbose:
-            print("")
-            print("Grammar summary")
-            print("===============")
-
-            print("- {n_labels} labels".format(n_labels=len(c.labels)))
-            print("- {n_dfas} dfas".format(n_dfas=len(c.dfas)))
-            print("- {n_tokens} tokens".format(n_tokens=len(c.tokens)))
-            print("- {n_keywords} keywords".format(n_keywords=len(c.keywords)))
-            print(
-                "- Start symbol: {start_symbol}".format(
-                    start_symbol=c.number2symbol[c.start]
-                )
-            )
-        return c
-
-    def make_first_sets(self, c, name):
-        rawfirst = self.first[name]
-        first = set()
-        for label in sorted(rawfirst):
-            ilabel = self.make_label(c, label)
-            ##assert ilabel not in first # XXX failed on <> ... !=
-            first.add(ilabel)
-        return first
-
-    def make_label(self, c, label):
-        label = Label(label)
-        ilabel = len(c.labels)
-
-        if label.type == LabelType.NONTERMINAL:
-            if label in c.symbol2label:
-                return c.symbol2label[label]
-            else:
-                c.labels.append((c.symbol2number[label], None))
-                c.symbol2label[label] = ilabel
-                return ilabel
-        elif label.type == LabelType.NAMED_TOKEN:
-            # A named token (NAME, NUMBER, STRING)
-            itoken = self.tokens.get(label, None)
-            assert isinstance(itoken, int), label
-            assert itoken in self.tokens.values(), label
-            if itoken in c.tokens:
-                return c.tokens[itoken]
-            else:
-                c.labels.append((itoken, None))
-                c.tokens[itoken] = ilabel
-                return ilabel
-        elif label.type == LabelType.KEYWORD:
-            # A keyword
-            value = literal_eval(label)
-            if value in c.keywords:
-                return c.keywords[value]
-            else:
-                c.labels.append((self.tokens["NAME"], value))
-                c.keywords[value] = ilabel
-                return ilabel
-        elif label.type == LabelType.OPERATOR:
-            # An operator (any non-numeric token)
-            value = literal_eval(label)
-            tok_name = self.opmap[value]  # Fails if unknown token
-            itoken = self.tokens[tok_name]
-            if itoken in c.tokens:
-                return c.tokens[itoken]
-            else:
-                c.labels.append((itoken, None))
-                c.tokens[itoken] = ilabel
-                return ilabel
-        else:
-            raise ValueError("Cannot categorize label {}".format(label))
-
-    def calculate_first_sets(self):
-        names = list(self.dfas.keys())
-        for name in names:
-            if name not in self.first:
-                self.calculate_first_sets_for_rule(name)
-
-            if self.verbose:
-                print("First set for {dfa_name}".format(dfa_name=name))
-                for item in self.first[name]:
-                    print("    - {terminal}".format(terminal=item))
-
-    def calculate_first_sets_for_rule(self, name):
-        dfa = self.dfas[name]
-        self.first[name] = None  # dummy to detect left recursion
-        state = dfa.states[0]
-        totalset = set()
-        overlapcheck = {}
-        for label, next in state.arcs.items():
-            if label in self.dfas:
-                if label in self.first:
-                    fset = self.first[label]
-                    if fset is None:
-                        raise ValueError("recursion for rule %r" % name)
-                else:
-                    self.calculate_first_sets_for_rule(label)
-                    fset = self.first[label]
-                totalset.update(fset)
-                overlapcheck[label] = fset
-            else:
-                totalset.add(label)
-                overlapcheck[label] = {label}
-        inverse = {}
-        for label, itsfirst in overlapcheck.items():
-            for symbol in itsfirst:
-                if symbol in inverse:
-                    raise ValueError(
-                        "rule %s is ambiguous; %s is in the"
-                        " first sets of %s as well as %s"
-                        % (name, symbol, label, inverse[symbol])
-                    )
-                inverse[symbol] = label
-        self.first[name] = totalset
diff --git a/Parser/pgen/token.py b/Parser/pgen/token.py
deleted file mode 100644
index 2cff62c..0000000
--- a/Parser/pgen/token.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import itertools
-
-
-def generate_tokens(tokens):
-    numbers = itertools.count(0)
-    for line in tokens:
-        line = line.strip()
-
-        if not line or line.startswith("#"):
-            continue
-
-        name = line.split()[0]
-        yield (name, next(numbers))
-
-    yield ("N_TOKENS", next(numbers))
-    yield ("NT_OFFSET", 256)
-
-
-def generate_opmap(tokens):
-    for line in tokens:
-        line = line.strip()
-
-        if not line or line.startswith("#"):
-            continue
-
-        pieces = line.split()
-
-        if len(pieces) != 2:
-            continue
-
-        name, op = pieces
-        yield (op.strip("'"), name)
-
-    # Yield independently <>. This is needed so it does not collide
-    # with the token generation in "generate_tokens" because if this
-    # symbol is included in Grammar/Tokens, it will collide with !=
-    # as it has the same name (NOTEQUAL).
-    yield ("<>", "NOTEQUAL")
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
new file mode 100644
index 0000000..0b47295
--- /dev/null
+++ b/Parser/pgenmain.c
@@ -0,0 +1,174 @@
+
+/* Parser generator main program */
+
+/* This expects a filename containing the grammar as argv[1] (UNIX)
+   or asks the console for such a file name (THINK C).
+   It writes its output on two files in the current directory:
+   - "graminit.c" gets the grammar as a bunch of initialized data
+   - "graminit.h" gets the grammar's non-terminals as #defines.
+   Error messages and status info during the generation process are
+   written to stdout, or sometimes to stderr. */
+
+/* XXX TO DO:
+   - check for duplicate definitions of names (instead of fatal err)
+*/
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "node.h"
+#include "parsetok.h"
+#include "pgen.h"
+
+int Py_DebugFlag;
+int Py_VerboseFlag;
+int Py_IgnoreEnvironmentFlag;
+
+/* Forward */
+grammar *getgrammar(char *filename);
+
+void
+Py_Exit(int sts)
+{
+    exit(sts);
+}
+
+int
+main(int argc, char **argv)
+{
+    grammar *g;
+    FILE *fp;
+    char *filename, *graminit_h, *graminit_c;
+
+    if (argc != 4) {
+        fprintf(stderr,
+            "usage: %s grammar graminit.h graminit.c\n", argv[0]);
+        Py_Exit(2);
+    }
+    filename = argv[1];
+    graminit_h = argv[2];
+    graminit_c = argv[3];
+    g = getgrammar(filename);
+    fp = fopen(graminit_c, "w");
+    if (fp == NULL) {
+        perror(graminit_c);
+        Py_Exit(1);
+    }
+    if (Py_DebugFlag)
+        printf("Writing %s ...\n", graminit_c);
+    printgrammar(g, fp);
+    fclose(fp);
+    fp = fopen(graminit_h, "w");
+    if (fp == NULL) {
+        perror(graminit_h);
+        Py_Exit(1);
+    }
+    if (Py_DebugFlag)
+        printf("Writing %s ...\n", graminit_h);
+    printnonterminals(g, fp);
+    fclose(fp);
+    freegrammar(g);
+    Py_Exit(0);
+    return 0; /* Make gcc -Wall happy */
+}
+
+grammar *
+getgrammar(char *filename)
+{
+    FILE *fp;
+    node *n;
+    grammar *g0, *g;
+    perrdetail err;
+
+    fp = fopen(filename, "r");
+    if (fp == NULL) {
+        perror(filename);
+        Py_Exit(1);
+    }
+    g0 = meta_grammar();
+    n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
+                  (char *)NULL, (char *)NULL, &err);
+    fclose(fp);
+    if (n == NULL) {
+        fprintf(stderr, "Parsing error %d, line %d.\n",
+            err.error, err.lineno);
+        if (err.text != NULL) {
+            size_t i;
+            fprintf(stderr, "%s", err.text);
+            i = strlen(err.text);
+            if (i == 0 || err.text[i-1] != '\n')
+                fprintf(stderr, "\n");
+            for (i = 0; i < err.offset; i++) {
+                if (err.text[i] == '\t')
+                    putc('\t', stderr);
+                else
+                    putc(' ', stderr);
+            }
+            fprintf(stderr, "^\n");
+            PyObject_FREE(err.text);
+        }
+        Py_Exit(1);
+    }
+    g = pgen(n);
+    if (g == NULL) {
+        printf("Bad grammar.\n");
+        Py_Exit(1);
+    }
+    return g;
+}
+
+/* Can't happen in pgen */
+PyObject*
+PyErr_Occurred()
+{
+    return 0;
+}
+
+void
+Py_FatalError(const char *msg)
+{
+    fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
+    Py_Exit(1);
+}
+
+/* No-nonsense my_readline() for tokenizer.c */
+
+char *
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
+{
+    size_t n = 1000;
+    char *p = (char *)PyMem_MALLOC(n);
+    char *q;
+    if (p == NULL)
+        return NULL;
+    fprintf(stderr, "%s", prompt);
+    q = fgets(p, n, sys_stdin);
+    if (q == NULL) {
+        *p = '\0';
+        return p;
+    }
+    n = strlen(p);
+    if (n > 0 && p[n-1] != '\n')
+        p[n-1] = '\n';
+    return (char *)PyMem_REALLOC(p, n+1);
+}
+
+/* No-nonsense fgets */
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+{
+    return fgets(buf, n, stream);
+}
+
+
+#include <stdarg.h>
+
+void
+PySys_WriteStderr(const char *format, ...)
+{
+    va_list va;
+
+    va_start(va, format);
+    vfprintf(stderr, format, va);
+    va_end(va);
+}
diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c
new file mode 100644
index 0000000..01f552f
--- /dev/null
+++ b/Parser/printgrammar.c
@@ -0,0 +1,117 @@
+
+/* Print a bunch of C initializers that represent a grammar */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+
+/* Forward */
+static void printarcs(int, dfa *, FILE *);
+static void printstates(grammar *, FILE *);
+static void printdfas(grammar *, FILE *);
+static void printlabels(grammar *, FILE *);
+
+void
+printgrammar(grammar *g, FILE *fp)
+{
+    fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+    fprintf(fp, "#include \"pgenheaders.h\"\n");
+    fprintf(fp, "#include \"grammar.h\"\n");
+    fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
+    printdfas(g, fp);
+    printlabels(g, fp);
+    fprintf(fp, "grammar _PyParser_Grammar = {\n");
+    fprintf(fp, "    %d,\n", g->g_ndfas);
+    fprintf(fp, "    dfas,\n");
+    fprintf(fp, "    {%d, labels},\n", g->g_ll.ll_nlabels);
+    fprintf(fp, "    %d\n", g->g_start);
+    fprintf(fp, "};\n");
+}
+
+void
+printnonterminals(grammar *g, FILE *fp)
+{
+    dfa *d;
+    int i;
+
+    fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+
+    d = g->g_dfa;
+    for (i = g->g_ndfas; --i >= 0; d++)
+        fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
+}
+
+static void
+printarcs(int i, dfa *d, FILE *fp)
+{
+    arc *a;
+    state *s;
+    int j, k;
+
+    s = d->d_state;
+    for (j = 0; j < d->d_nstates; j++, s++) {
+        fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
+            i, j, s->s_narcs);
+        a = s->s_arc;
+        for (k = 0; k < s->s_narcs; k++, a++)
+            fprintf(fp, "    {%d, %d},\n", a->a_lbl, a->a_arrow);
+        fprintf(fp, "};\n");
+    }
+}
+
+static void
+printstates(grammar *g, FILE *fp)
+{
+    state *s;
+    dfa *d;
+    int i, j;
+
+    d = g->g_dfa;
+    for (i = 0; i < g->g_ndfas; i++, d++) {
+        printarcs(i, d, fp);
+        fprintf(fp, "static state states_%d[%d] = {\n",
+            i, d->d_nstates);
+        s = d->d_state;
+        for (j = 0; j < d->d_nstates; j++, s++)
+            fprintf(fp, "    {%d, arcs_%d_%d},\n",
+                s->s_narcs, i, j);
+        fprintf(fp, "};\n");
+    }
+}
+
+static void
+printdfas(grammar *g, FILE *fp)
+{
+    dfa *d;
+    int i, j;
+
+    printstates(g, fp);
+    fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
+    d = g->g_dfa;
+    for (i = 0; i < g->g_ndfas; i++, d++) {
+        fprintf(fp, "    {%d, \"%s\", %d, %d, states_%d,\n",
+            d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
+        fprintf(fp, "     \"");
+        for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
+            fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
+        fprintf(fp, "\"},\n");
+    }
+    fprintf(fp, "};\n");
+}
+
+static void
+printlabels(grammar *g, FILE *fp)
+{
+    label *l;
+    int i;
+
+    fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
+    l = g->g_ll.ll_label;
+    for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
+        if (l->lb_str == NULL)
+            fprintf(fp, "    {%d, 0},\n", l->lb_type);
+        else
+            fprintf(fp, "    {%d, \"%s\"},\n",
+                l->lb_type, l->lb_str);
+    }
+    fprintf(fp, "};\n");
+}
diff --git a/Parser/spark.py b/Parser/spark.py
new file mode 100644
index 0000000..b064d62
--- /dev/null
+++ b/Parser/spark.py
@@ -0,0 +1,839 @@
+#  Copyright (c) 1998-2002 John Aycock
+#
+#  Permission is hereby granted, free of charge, to any person obtaining
+#  a copy of this software and associated documentation files (the
+#  "Software"), to deal in the Software without restriction, including
+#  without limitation the rights to use, copy, modify, merge, publish,
+#  distribute, sublicense, and/or sell copies of the Software, and to
+#  permit persons to whom the Software is furnished to do so, subject to
+#  the following conditions:
+#
+#  The above copyright notice and this permission notice shall be
+#  included in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+#  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+__version__ = 'SPARK-0.7 (pre-alpha-5)'
+
+import re
+import string
+
+def _namelist(instance):
+    namelist, namedict, classlist = [], {}, [instance.__class__]
+    for c in classlist:
+        for b in c.__bases__:
+            classlist.append(b)
+        for name in c.__dict__.keys():
+            if not namedict.has_key(name):
+                namelist.append(name)
+                namedict[name] = 1
+    return namelist
+
+class GenericScanner:
+    def __init__(self, flags=0):
+        pattern = self.reflect()
+        self.re = re.compile(pattern, re.VERBOSE|flags)
+
+        self.index2func = {}
+        for name, number in self.re.groupindex.items():
+            self.index2func[number-1] = getattr(self, 't_' + name)
+
+    def makeRE(self, name):
+        doc = getattr(self, name).__doc__
+        rv = '(?P<%s>%s)' % (name[2:], doc)
+        return rv
+
+    def reflect(self):
+        rv = []
+        for name in _namelist(self):
+            if name[:2] == 't_' and name != 't_default':
+                rv.append(self.makeRE(name))
+
+        rv.append(self.makeRE('t_default'))
+        return string.join(rv, '|')
+
+    def error(self, s, pos):
+        print "Lexical error at position %s" % pos
+        raise SystemExit
+
+    def tokenize(self, s):
+        pos = 0
+        n = len(s)
+        while pos < n:
+            m = self.re.match(s, pos)
+            if m is None:
+                self.error(s, pos)
+
+            groups = m.groups()
+            for i in range(len(groups)):
+                if groups[i] and self.index2func.has_key(i):
+                    self.index2func[i](groups[i])
+            pos = m.end()
+
+    def t_default(self, s):
+        r'( . | \n )+'
+        print "Specification error: unmatched input"
+        raise SystemExit
+
+#
+#  Extracted from GenericParser and made global so that [un]picking works.
+#
+class _State:
+    def __init__(self, stateno, items):
+        self.T, self.complete, self.items = [], [], items
+        self.stateno = stateno
+
+class GenericParser:
+    #
+    #  An Earley parser, as per J. Earley, "An Efficient Context-Free
+    #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley,
+    #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
+    #  Carnegie-Mellon University, August 1968.  New formulation of
+    #  the parser according to J. Aycock, "Practical Earley Parsing
+    #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
+    #  2001, and J. Aycock and R. N. Horspool, "Practical Earley
+    #  Parsing", unpublished paper, 2001.
+    #
+
+    def __init__(self, start):
+        self.rules = {}
+        self.rule2func = {}
+        self.rule2name = {}
+        self.collectRules()
+        self.augment(start)
+        self.ruleschanged = 1
+
+    _NULLABLE = '\e_'
+    _START = 'START'
+    _BOF = '|-'
+
+    #
+    #  When pickling, take the time to generate the full state machine;
+    #  some information is then extraneous, too.  Unfortunately we
+    #  can't save the rule2func map.
+    #
+    def __getstate__(self):
+        if self.ruleschanged:
+            #
+            #  XXX - duplicated from parse()
+            #
+            self.computeNull()
+            self.newrules = {}
+            self.new2old = {}
+            self.makeNewRules()
+            self.ruleschanged = 0
+            self.edges, self.cores = {}, {}
+            self.states = { 0: self.makeState0() }
+            self.makeState(0, self._BOF)
+        #
+        #  XXX - should find a better way to do this..
+        #
+        changes = 1
+        while changes:
+            changes = 0
+            for k, v in self.edges.items():
+                if v is None:
+                    state, sym = k
+                    if self.states.has_key(state):
+                        self.goto(state, sym)
+                        changes = 1
+        rv = self.__dict__.copy()
+        for s in self.states.values():
+            del s.items
+        del rv['rule2func']
+        del rv['nullable']
+        del rv['cores']
+        return rv
+
+    def __setstate__(self, D):
+        self.rules = {}
+        self.rule2func = {}
+        self.rule2name = {}
+        self.collectRules()
+        start = D['rules'][self._START][0][1][1]        # Blech.
+        self.augment(start)
+        D['rule2func'] = self.rule2func
+        D['makeSet'] = self.makeSet_fast
+        self.__dict__ = D
+
+    #
+    #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess
+    #  thee not with this; nor shall thee toucheth the _preprocess
+    #  argument to addRule.
+    #
+    def preprocess(self, rule, func):       return rule, func
+
+    def addRule(self, doc, func, _preprocess=1):
+        fn = func
+        rules = string.split(doc)
+
+        index = []
+        for i in range(len(rules)):
+            if rules[i] == '::=':
+                index.append(i-1)
+        index.append(len(rules))
+
+        for i in range(len(index)-1):
+            lhs = rules[index[i]]
+            rhs = rules[index[i]+2:index[i+1]]
+            rule = (lhs, tuple(rhs))
+
+            if _preprocess:
+                rule, fn = self.preprocess(rule, func)
+
+            if self.rules.has_key(lhs):
+                self.rules[lhs].append(rule)
+            else:
+                self.rules[lhs] = [ rule ]
+            self.rule2func[rule] = fn
+            self.rule2name[rule] = func.__name__[2:]
+        self.ruleschanged = 1
+
+    def collectRules(self):
+        for name in _namelist(self):
+            if name[:2] == 'p_':
+                func = getattr(self, name)
+                doc = func.__doc__
+                self.addRule(doc, func)
+
+    def augment(self, start):
+        rule = '%s ::= %s %s' % (self._START, self._BOF, start)
+        self.addRule(rule, lambda args: args[1], 0)
+
+    def computeNull(self):
+        self.nullable = {}
+        tbd = []
+
+        for rulelist in self.rules.values():
+            lhs = rulelist[0][0]
+            self.nullable[lhs] = 0
+            for rule in rulelist:
+                rhs = rule[1]
+                if len(rhs) == 0:
+                    self.nullable[lhs] = 1
+                    continue
+                #
+                #  We only need to consider rules which
+                #  consist entirely of nonterminal symbols.
+                #  This should be a savings on typical
+                #  grammars.
+                #
+                for sym in rhs:
+                    if not self.rules.has_key(sym):
+                        break
+                else:
+                    tbd.append(rule)
+        changes = 1
+        while changes:
+            changes = 0
+            for lhs, rhs in tbd:
+                if self.nullable[lhs]:
+                    continue
+                for sym in rhs:
+                    if not self.nullable[sym]:
+                        break
+                else:
+                    self.nullable[lhs] = 1
+                    changes = 1
+
+    def makeState0(self):
+        s0 = _State(0, [])
+        for rule in self.newrules[self._START]:
+            s0.items.append((rule, 0))
+        return s0
+
+    def finalState(self, tokens):
+        #
+        #  Yuck.
+        #
+        if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
+            return 1
+        start = self.rules[self._START][0][1][1]
+        return self.goto(1, start)
+
+    def makeNewRules(self):
+        worklist = []
+        for rulelist in self.rules.values():
+            for rule in rulelist:
+                worklist.append((rule, 0, 1, rule))
+
+        for rule, i, candidate, oldrule in worklist:
+            lhs, rhs = rule
+            n = len(rhs)
+            while i < n:
+                sym = rhs[i]
+                if not self.rules.has_key(sym) or \
+                   not self.nullable[sym]:
+                    candidate = 0
+                    i = i + 1
+                    continue
+
+                newrhs = list(rhs)
+                newrhs[i] = self._NULLABLE+sym
+                newrule = (lhs, tuple(newrhs))
+                worklist.append((newrule, i+1,
+                                 candidate, oldrule))
+                candidate = 0
+                i = i + 1
+            else:
+                if candidate:
+                    lhs = self._NULLABLE+lhs
+                    rule = (lhs, rhs)
+                if self.newrules.has_key(lhs):
+                    self.newrules[lhs].append(rule)
+                else:
+                    self.newrules[lhs] = [ rule ]
+                self.new2old[rule] = oldrule
+
+    def typestring(self, token):
+        return None
+
+    def error(self, token):
+        print "Syntax error at or near `%s' token" % token
+        raise SystemExit
+
+    def parse(self, tokens):
+        sets = [ [(1,0), (2,0)] ]
+        self.links = {}
+
+        if self.ruleschanged:
+            self.computeNull()
+            self.newrules = {}
+            self.new2old = {}
+            self.makeNewRules()
+            self.ruleschanged = 0
+            self.edges, self.cores = {}, {}
+            self.states = { 0: self.makeState0() }
+            self.makeState(0, self._BOF)
+
+        for i in xrange(len(tokens)):
+            sets.append([])
+
+            if sets[i] == []:
+                break
+            self.makeSet(tokens[i], sets, i)
+        else:
+            sets.append([])
+            self.makeSet(None, sets, len(tokens))
+
+        #_dump(tokens, sets, self.states)
+
+        finalitem = (self.finalState(tokens), 0)
+        if finalitem not in sets[-2]:
+            if len(tokens) > 0:
+                self.error(tokens[i-1])
+            else:
+                self.error(None)
+
+        return self.buildTree(self._START, finalitem,
+                              tokens, len(sets)-2)
+
+    def isnullable(self, sym):
+        #
+        #  For symbols in G_e only.  If we weren't supporting 1.5,
+        #  could just use sym.startswith().
+        #
+        return self._NULLABLE == sym[0:len(self._NULLABLE)]
+
+    def skip(self, (lhs, rhs), pos=0):
+        n = len(rhs)
+        while pos < n:
+            if not self.isnullable(rhs[pos]):
+                break
+            pos = pos + 1
+        return pos
+
+    def makeState(self, state, sym):
+        assert sym is not None
+        #
+        #  Compute \epsilon-kernel state's core and see if
+        #  it exists already.
+        #
+        kitems = []
+        for rule, pos in self.states[state].items:
+            lhs, rhs = rule
+            if rhs[pos:pos+1] == (sym,):
+                kitems.append((rule, self.skip(rule, pos+1)))
+        core = kitems
+
+        core.sort()
+        tcore = tuple(core)
+        if self.cores.has_key(tcore):
+            return self.cores[tcore]
+        #
+        #  Nope, doesn't exist.  Compute it and the associated
+        #  \epsilon-nonkernel state together; we'll need it right away.
+        #
+        k = self.cores[tcore] = len(self.states)
+        K, NK = _State(k, kitems), _State(k+1, [])
+        self.states[k] = K
+        predicted = {}
+
+        edges = self.edges
+        rules = self.newrules
+        for X in K, NK:
+            worklist = X.items
+            for item in worklist:
+                rule, pos = item
+                lhs, rhs = rule
+                if pos == len(rhs):
+                    X.complete.append(rule)
+                    continue
+
+                nextSym = rhs[pos]
+                key = (X.stateno, nextSym)
+                if not rules.has_key(nextSym):
+                    if not edges.has_key(key):
+                        edges[key] = None
+                        X.T.append(nextSym)
+                else:
+                    edges[key] = None
+                    if not predicted.has_key(nextSym):
+                        predicted[nextSym] = 1
+                        for prule in rules[nextSym]:
+                            ppos = self.skip(prule)
+                            new = (prule, ppos)
+                            NK.items.append(new)
+            #
+            #  Problem: we know K needs generating, but we
+            #  don't yet know about NK.  Can't commit anything
+            #  regarding NK to self.edges until we're sure.  Should
+            #  we delay committing on both K and NK to avoid this
+            #  hacky code?  This creates other problems..
+            #
+            if X is K:
+                edges = {}
+
+        if NK.items == []:
+            return k
+
+        #
+        #  Check for \epsilon-nonkernel's core.  Unfortunately we
+        #  need to know the entire set of predicted nonterminals
+        #  to do this without accidentally duplicating states.
+        #
+        core = predicted.keys()
+        core.sort()
+        tcore = tuple(core)
+        if self.cores.has_key(tcore):
+            self.edges[(k, None)] = self.cores[tcore]
+            return k
+
+        nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
+        self.edges.update(edges)
+        self.states[nk] = NK
+        return k
+
+    def goto(self, state, sym):
+        key = (state, sym)
+        if not self.edges.has_key(key):
+            #
+            #  No transitions from state on sym.
+            #
+            return None
+
+        rv = self.edges[key]
+        if rv is None:
+            #
+            #  Target state isn't generated yet.  Remedy this.
+            #
+            rv = self.makeState(state, sym)
+            self.edges[key] = rv
+        return rv
+
+    def gotoT(self, state, t):
+        return [self.goto(state, t)]
+
+    def gotoST(self, state, st):
+        rv = []
+        for t in self.states[state].T:
+            if st == t:
+                rv.append(self.goto(state, t))
+        return rv
+
+    def add(self, set, item, i=None, predecessor=None, causal=None):
+        if predecessor is None:
+            if item not in set:
+                set.append(item)
+        else:
+            key = (item, i)
+            if item not in set:
+                self.links[key] = []
+                set.append(item)
+            self.links[key].append((predecessor, causal))
+
+    def makeSet(self, token, sets, i):
+        cur, next = sets[i], sets[i+1]
+
+        ttype = token is not None and self.typestring(token) or None
+        if ttype is not None:
+            fn, arg = self.gotoT, ttype
+        else:
+            fn, arg = self.gotoST, token
+
+        for item in cur:
+            ptr = (item, i)
+            state, parent = item
+            add = fn(state, arg)
+            for k in add:
+                if k is not None:
+                    self.add(next, (k, parent), i+1, ptr)
+                    nk = self.goto(k, None)
+                    if nk is not None:
+                        self.add(next, (nk, i+1))
+
+            if parent == i:
+                continue
+
+            for rule in self.states[state].complete:
+                lhs, rhs = rule
+                for pitem in sets[parent]:
+                    pstate, pparent = pitem
+                    k = self.goto(pstate, lhs)
+                    if k is not None:
+                        why = (item, i, rule)
+                        pptr = (pitem, parent)
+                        self.add(cur, (k, pparent),
+                                 i, pptr, why)
+                        nk = self.goto(k, None)
+                        if nk is not None:
+                            self.add(cur, (nk, i))
+
+    def makeSet_fast(self, token, sets, i):
+        #
+        #  Call *only* when the entire state machine has been built!
+        #  It relies on self.edges being filled in completely, and
+        #  then duplicates and inlines code to boost speed at the
+        #  cost of extreme ugliness.
+        #
+        cur, next = sets[i], sets[i+1]
+        ttype = token is not None and self.typestring(token) or None
+
+        for item in cur:
+            ptr = (item, i)
+            state, parent = item
+            if ttype is not None:
+                k = self.edges.get((state, ttype), None)
+                if k is not None:
+                    #self.add(next, (k, parent), i+1, ptr)
+                    #INLINED --v
+                    new = (k, parent)
+                    key = (new, i+1)
+                    if new not in next:
+                        self.links[key] = []
+                        next.append(new)
+                    self.links[key].append((ptr, None))
+                    #INLINED --^
+                    #nk = self.goto(k, None)
+                    nk = self.edges.get((k, None), None)
+                    if nk is not None:
+                        #self.add(next, (nk, i+1))
+                        #INLINED --v
+                        new = (nk, i+1)
+                        if new not in next:
+                            next.append(new)
+                        #INLINED --^
+            else:
+                add = self.gotoST(state, token)
+                for k in add:
+                    if k is not None:
+                        self.add(next, (k, parent), i+1, ptr)
+                        #nk = self.goto(k, None)
+                        nk = self.edges.get((k, None), None)
+                        if nk is not None:
+                            self.add(next, (nk, i+1))
+
+            if parent == i:
+                continue
+
+            for rule in self.states[state].complete:
+                lhs, rhs = rule
+                for pitem in sets[parent]:
+                    pstate, pparent = pitem
+                    #k = self.goto(pstate, lhs)
+                    k = self.edges.get((pstate, lhs), None)
+                    if k is not None:
+                        why = (item, i, rule)
+                        pptr = (pitem, parent)
+                        #self.add(cur, (k, pparent),
+                        #        i, pptr, why)
+                        #INLINED --v
+                        new = (k, pparent)
+                        key = (new, i)
+                        if new not in cur:
+                            self.links[key] = []
+                            cur.append(new)
+                        self.links[key].append((pptr, why))
+                        #INLINED --^
+                        #nk = self.goto(k, None)
+                        nk = self.edges.get((k, None), None)
+                        if nk is not None:
+                            #self.add(cur, (nk, i))
+                            #INLINED --v
+                            new = (nk, i)
+                            if new not in cur:
+                                cur.append(new)
+                            #INLINED --^
+
+    def predecessor(self, key, causal):
+        for p, c in self.links[key]:
+            if c == causal:
+                return p
+        assert 0
+
+    def causal(self, key):
+        links = self.links[key]
+        if len(links) == 1:
+            return links[0][1]
+        choices = []
+        rule2cause = {}
+        for p, c in links:
+            rule = c[2]
+            choices.append(rule)
+            rule2cause[rule] = c
+        return rule2cause[self.ambiguity(choices)]
+
+    def deriveEpsilon(self, nt):
+        if len(self.newrules[nt]) > 1:
+            rule = self.ambiguity(self.newrules[nt])
+        else:
+            rule = self.newrules[nt][0]
+        #print rule
+
+        rhs = rule[1]
+        attr = [None] * len(rhs)
+
+        for i in range(len(rhs)-1, -1, -1):
+            attr[i] = self.deriveEpsilon(rhs[i])
+        return self.rule2func[self.new2old[rule]](attr)
+
+    def buildTree(self, nt, item, tokens, k):
+        state, parent = item
+
+        choices = []
+        for rule in self.states[state].complete:
+            if rule[0] == nt:
+                choices.append(rule)
+        rule = choices[0]
+        if len(choices) > 1:
+            rule = self.ambiguity(choices)
+        #print rule
+
+        rhs = rule[1]
+        attr = [None] * len(rhs)
+
+        for i in range(len(rhs)-1, -1, -1):
+            sym = rhs[i]
+            if not self.newrules.has_key(sym):
+                if sym != self._BOF:
+                    attr[i] = tokens[k-1]
+                    key = (item, k)
+                    item, k = self.predecessor(key, None)
+            #elif self.isnullable(sym):
+            elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
+                attr[i] = self.deriveEpsilon(sym)
+            else:
+                key = (item, k)
+                why = self.causal(key)
+                attr[i] = self.buildTree(sym, why[0],
+                                         tokens, why[1])
+                item, k = self.predecessor(key, why)
+        return self.rule2func[self.new2old[rule]](attr)
+
+    def ambiguity(self, rules):
+        #
+        #  XXX - problem here and in collectRules() if the same rule
+        #        appears in >1 method.  Also undefined results if rules
+        #        causing the ambiguity appear in the same method.
+        #
+        sortlist = []
+        name2index = {}
+        for i in range(len(rules)):
+            lhs, rhs = rule = rules[i]
+            name = self.rule2name[self.new2old[rule]]
+            sortlist.append((len(rhs), name))
+            name2index[name] = i
+        sortlist.sort()
+        list = map(lambda (a,b): b, sortlist)
+        return rules[name2index[self.resolve(list)]]
+
+    def resolve(self, list):
+        #
+        #  Resolve ambiguity in favor of the shortest RHS.
+        #  Since we walk the tree from the top down, this
+        #  should effectively resolve in favor of a "shift".
+        #
+        return list[0]
+
+#
+#  GenericASTBuilder automagically constructs a concrete/abstract syntax tree
+#  for a given input.  The extra argument is a class (not an instance!)
+#  which supports the "__setslice__" and "__len__" methods.
+#
+#  XXX - silently overrides any user code in methods.
+#
+
+class GenericASTBuilder(GenericParser):
+    def __init__(self, AST, start):
+        GenericParser.__init__(self, start)
+        self.AST = AST
+
+    def preprocess(self, rule, func):
+        rebind = lambda lhs, self=self: \
+                        lambda args, lhs=lhs, self=self: \
+                                self.buildASTNode(args, lhs)
+        lhs, rhs = rule
+        return rule, rebind(lhs)
+
+    def buildASTNode(self, args, lhs):
+        children = []
+        for arg in args:
+            if isinstance(arg, self.AST):
+                children.append(arg)
+            else:
+                children.append(self.terminal(arg))
+        return self.nonterminal(lhs, children)
+
+    def terminal(self, token):      return token
+
+    def nonterminal(self, type, args):
+        rv = self.AST(type)
+        rv[:len(args)] = args
+        return rv
+
+#
+#  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For
+#  each node it attempts to invoke the method n_<node type>, falling
+#  back onto the default() method if the n_* can't be found.  The preorder
+#  traversal also looks for an exit hook named n_<node type>_exit (no default
+#  routine is called if it's not found).  To prematurely halt traversal
+#  of a subtree, call the prune() method -- this only makes sense for a
+#  preorder traversal.  Node type is determined via the typestring() method.
+#
+
+class GenericASTTraversalPruningException:
+    pass
+
+class GenericASTTraversal:
+    def __init__(self, ast):
+        self.ast = ast
+
+    def typestring(self, node):
+        return node.type
+
+    def prune(self):
+        raise GenericASTTraversalPruningException
+
+    def preorder(self, node=None):
+        if node is None:
+            node = self.ast
+
+        try:
+            name = 'n_' + self.typestring(node)
+            if hasattr(self, name):
+                func = getattr(self, name)
+                func(node)
+            else:
+                self.default(node)
+        except GenericASTTraversalPruningException:
+            return
+
+        for kid in node:
+            self.preorder(kid)
+
+        name = name + '_exit'
+        if hasattr(self, name):
+            func = getattr(self, name)
+            func(node)
+
+    def postorder(self, node=None):
+        if node is None:
+            node = self.ast
+
+        for kid in node:
+            self.postorder(kid)
+
+        name = 'n_' + self.typestring(node)
+        if hasattr(self, name):
+            func = getattr(self, name)
+            func(node)
+        else:
+            self.default(node)
+
+
+    def default(self, node):
+        pass
+
+#
+#  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__"
+#  implemented.
+#
+#  XXX - makes assumptions about how GenericParser walks the parse tree.
+#
+
+class GenericASTMatcher(GenericParser):
+    def __init__(self, start, ast):
+        GenericParser.__init__(self, start)
+        self.ast = ast
+
+    def preprocess(self, rule, func):
+        rebind = lambda func, self=self: \
+                        lambda args, func=func, self=self: \
+                                self.foundMatch(args, func)
+        lhs, rhs = rule
+        rhslist = list(rhs)
+        rhslist.reverse()
+
+        return (lhs, tuple(rhslist)), rebind(func)
+
+    def foundMatch(self, args, func):
+        func(args[-1])
+        return args[-1]
+
+    def match_r(self, node):
+        self.input.insert(0, node)
+        children = 0
+
+        for child in node:
+            if children == 0:
+                self.input.insert(0, '(')
+            children = children + 1
+            self.match_r(child)
+
+        if children > 0:
+            self.input.insert(0, ')')
+
+    def match(self, ast=None):
+        if ast is None:
+            ast = self.ast
+        self.input = []
+
+        self.match_r(ast)
+        self.parse(self.input)
+
+    def resolve(self, list):
+        #
+        #  Resolve ambiguity in favor of the longest RHS.
+        #
+        return list[-1]
+
+def _dump(tokens, sets, states):
+    for i in range(len(sets)):
+        print 'set', i
+        for item in sets[i]:
+            print '\t', item
+            for (lhs, rhs), pos in states[item[0]].items:
+                print '\t\t', lhs, '::=',
+                print string.join(rhs[:pos]),
+                print '.',
+                print string.join(rhs[pos:])
+        if i < len(tokens):
+            print
+            print 'token', str(tokens[i])
+            print
diff --git a/Parser/token.c b/Parser/token.c
deleted file mode 100644
index a489668..0000000
--- a/Parser/token.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/* Auto-generated by Tools/scripts/generate_token.py */
-
-#include "Python.h"
-#include "token.h"
-
-/* Token names */
-
-const char * const _PyParser_TokenNames[] = {
-    "ENDMARKER",
-    "NAME",
-    "NUMBER",
-    "STRING",
-    "NEWLINE",
-    "INDENT",
-    "DEDENT",
-    "LPAR",
-    "RPAR",
-    "LSQB",
-    "RSQB",
-    "COLON",
-    "COMMA",
-    "SEMI",
-    "PLUS",
-    "MINUS",
-    "STAR",
-    "SLASH",
-    "VBAR",
-    "AMPER",
-    "LESS",
-    "GREATER",
-    "EQUAL",
-    "DOT",
-    "PERCENT",
-    "LBRACE",
-    "RBRACE",
-    "EQEQUAL",
-    "NOTEQUAL",
-    "LESSEQUAL",
-    "GREATEREQUAL",
-    "TILDE",
-    "CIRCUMFLEX",
-    "LEFTSHIFT",
-    "RIGHTSHIFT",
-    "DOUBLESTAR",
-    "PLUSEQUAL",
-    "MINEQUAL",
-    "STAREQUAL",
-    "SLASHEQUAL",
-    "PERCENTEQUAL",
-    "AMPEREQUAL",
-    "VBAREQUAL",
-    "CIRCUMFLEXEQUAL",
-    "LEFTSHIFTEQUAL",
-    "RIGHTSHIFTEQUAL",
-    "DOUBLESTAREQUAL",
-    "DOUBLESLASH",
-    "DOUBLESLASHEQUAL",
-    "AT",
-    "ATEQUAL",
-    "RARROW",
-    "ELLIPSIS",
-    "COLONEQUAL",
-    "OP",
-    "AWAIT",
-    "ASYNC",
-    "TYPE_IGNORE",
-    "TYPE_COMMENT",
-    "<ERRORTOKEN>",
-    "<COMMENT>",
-    "<NL>",
-    "<ENCODING>",
-    "<N_TOKENS>",
-};
-
-/* Return the token corresponding to a single character */
-
-int
-PyToken_OneChar(int c1)
-{
-    switch (c1) {
-    case '%': return PERCENT;
-    case '&': return AMPER;
-    case '(': return LPAR;
-    case ')': return RPAR;
-    case '*': return STAR;
-    case '+': return PLUS;
-    case ',': return COMMA;
-    case '-': return MINUS;
-    case '.': return DOT;
-    case '/': return SLASH;
-    case ':': return COLON;
-    case ';': return SEMI;
-    case '<': return LESS;
-    case '=': return EQUAL;
-    case '>': return GREATER;
-    case '@': return AT;
-    case '[': return LSQB;
-    case ']': return RSQB;
-    case '^': return CIRCUMFLEX;
-    case '{': return LBRACE;
-    case '|': return VBAR;
-    case '}': return RBRACE;
-    case '~': return TILDE;
-    }
-    return OP;
-}
-
-int
-PyToken_TwoChars(int c1, int c2)
-{
-    switch (c1) {
-    case '!':
-        switch (c2) {
-        case '=': return NOTEQUAL;
-        }
-        break;
-    case '%':
-        switch (c2) {
-        case '=': return PERCENTEQUAL;
-        }
-        break;
-    case '&':
-        switch (c2) {
-        case '=': return AMPEREQUAL;
-        }
-        break;
-    case '*':
-        switch (c2) {
-        case '*': return DOUBLESTAR;
-        case '=': return STAREQUAL;
-        }
-        break;
-    case '+':
-        switch (c2) {
-        case '=': return PLUSEQUAL;
-        }
-        break;
-    case '-':
-        switch (c2) {
-        case '=': return MINEQUAL;
-        case '>': return RARROW;
-        }
-        break;
-    case '/':
-        switch (c2) {
-        case '/': return DOUBLESLASH;
-        case '=': return SLASHEQUAL;
-        }
-        break;
-    case ':':
-        switch (c2) {
-        case '=': return COLONEQUAL;
-        }
-        break;
-    case '<':
-        switch (c2) {
-        case '<': return LEFTSHIFT;
-        case '=': return LESSEQUAL;
-        case '>': return NOTEQUAL;
-        }
-        break;
-    case '=':
-        switch (c2) {
-        case '=': return EQEQUAL;
-        }
-        break;
-    case '>':
-        switch (c2) {
-        case '=': return GREATEREQUAL;
-        case '>': return RIGHTSHIFT;
-        }
-        break;
-    case '@':
-        switch (c2) {
-        case '=': return ATEQUAL;
-        }
-        break;
-    case '^':
-        switch (c2) {
-        case '=': return CIRCUMFLEXEQUAL;
-        }
-        break;
-    case '|':
-        switch (c2) {
-        case '=': return VBAREQUAL;
-        }
-        break;
-    }
-    return OP;
-}
-
-int
-PyToken_ThreeChars(int c1, int c2, int c3)
-{
-    switch (c1) {
-    case '*':
-        switch (c2) {
-        case '*':
-            switch (c3) {
-            case '=': return DOUBLESTAREQUAL;
-            }
-            break;
-        }
-        break;
-    case '.':
-        switch (c2) {
-        case '.':
-            switch (c3) {
-            case '.': return ELLIPSIS;
-            }
-            break;
-        }
-        break;
-    case '/':
-        switch (c2) {
-        case '/':
-            switch (c3) {
-            case '=': return DOUBLESLASHEQUAL;
-            }
-            break;
-        }
-        break;
-    case '<':
-        switch (c2) {
-        case '<':
-            switch (c3) {
-            case '=': return LEFTSHIFTEQUAL;
-            }
-            break;
-        }
-        break;
-    case '>':
-        switch (c2) {
-        case '>':
-            switch (c3) {
-            case '=': return RIGHTSHIFTEQUAL;
-            }
-            break;
-        }
-        break;
-    }
-    return OP;
-}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index f84093d..8966661 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -2,6 +2,7 @@
 /* Tokenizer implementation */
 
 #include "Python.h"
+#include "pgenheaders.h"
 
 #include <ctype.h>
 #include <assert.h>
@@ -9,29 +10,16 @@
 #include "tokenizer.h"
 #include "errcode.h"
 
+#ifndef PGEN
 #include "unicodeobject.h"
-#include "bytesobject.h"
+#include "stringobject.h"
 #include "fileobject.h"
 #include "codecs.h"
 #include "abstract.h"
+#include "pydebug.h"
+#endif /* PGEN */
 
-/* Alternate tab spacing */
-#define ALTTABSIZE 1
-
-#define is_potential_identifier_start(c) (\
-              (c >= 'a' && c <= 'z')\
-               || (c >= 'A' && c <= 'Z')\
-               || c == '_'\
-               || (c >= 128))
-
-#define is_potential_identifier_char(c) (\
-              (c >= 'a' && c <= 'z')\
-               || (c >= 'A' && c <= 'Z')\
-               || (c >= '0' && c <= '9')\
-               || c == '_'\
-               || (c >= 128))
-
-extern char *PyOS_Readline(FILE *, FILE *, const char *);
+extern char *PyOS_Readline(FILE *, FILE *, char *);
 /* Return malloc'ed string including trailing \n;
    empty malloc'ed string for EOF;
    NULL if interrupted */
@@ -44,10 +32,65 @@ static struct tok_state *tok_new(void);
 static int tok_nextc(struct tok_state *tok);
 static void tok_backup(struct tok_state *tok, int c);
 
-
-/* Spaces in this constant are treated as "zero or more spaces or tabs" when
-   tokenizing. */
-static const char* type_comment_prefix = "# type: ";
+/* Token names */
+
+char *_PyParser_TokenNames[] = {
+    "ENDMARKER",
+    "NAME",
+    "NUMBER",
+    "STRING",
+    "NEWLINE",
+    "INDENT",
+    "DEDENT",
+    "LPAR",
+    "RPAR",
+    "LSQB",
+    "RSQB",
+    "COLON",
+    "COMMA",
+    "SEMI",
+    "PLUS",
+    "MINUS",
+    "STAR",
+    "SLASH",
+    "VBAR",
+    "AMPER",
+    "LESS",
+    "GREATER",
+    "EQUAL",
+    "DOT",
+    "PERCENT",
+    "BACKQUOTE",
+    "LBRACE",
+    "RBRACE",
+    "EQEQUAL",
+    "NOTEQUAL",
+    "LESSEQUAL",
+    "GREATEREQUAL",
+    "TILDE",
+    "CIRCUMFLEX",
+    "LEFTSHIFT",
+    "RIGHTSHIFT",
+    "DOUBLESTAR",
+    "PLUSEQUAL",
+    "MINEQUAL",
+    "STAREQUAL",
+    "SLASHEQUAL",
+    "PERCENTEQUAL",
+    "AMPEREQUAL",
+    "VBAREQUAL",
+    "CIRCUMFLEXEQUAL",
+    "LEFTSHIFTEQUAL",
+    "RIGHTSHIFTEQUAL",
+    "DOUBLESTAREQUAL",
+    "DOUBLESLASH",
+    "DOUBLESLASHEQUAL",
+    "AT",
+    /* This table must match the #defines in token.h! */
+    "OP",
+    "<ERRORTOKEN>",
+    "<N_TOKENS>"
+};
 
 /* Create and initialize a new tok_state structure */
 
@@ -65,45 +108,61 @@ tok_new(void)
     tok->tabsize = TABSIZE;
     tok->indent = 0;
     tok->indstack[0] = 0;
-
     tok->atbol = 1;
     tok->pendin = 0;
     tok->prompt = tok->nextprompt = NULL;
     tok->lineno = 0;
     tok->level = 0;
+    tok->filename = NULL;
+    tok->altwarning = 0;
+    tok->alterror = 0;
+    tok->alttabsize = 1;
     tok->altindstack[0] = 0;
-    tok->decoding_state = STATE_INIT;
+    tok->decoding_state = 0;
     tok->decoding_erred = 0;
     tok->read_coding_spec = 0;
-    tok->enc = NULL;
     tok->encoding = NULL;
     tok->cont_line = 0;
-    tok->filename = NULL;
+#ifndef PGEN
     tok->decoding_readline = NULL;
     tok->decoding_buffer = NULL;
-    tok->type_comments = 0;
-
-    tok->async_hacks = 0;
-    tok->async_def = 0;
-    tok->async_def_indent = 0;
-    tok->async_def_nl = 0;
-
+#endif
     return tok;
 }
 
 static char *
-new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
+new_string(const char *s, Py_ssize_t len)
 {
     char* result = (char *)PyMem_MALLOC(len + 1);
-    if (!result) {
-        tok->done = E_NOMEM;
-        return NULL;
+    if (result != NULL) {
+        memcpy(result, s, len);
+        result[len] = '\0';
     }
-    memcpy(result, s, len);
-    result[len] = '\0';
     return result;
 }
 
+#ifdef PGEN
+
+static char *
+decoding_fgets(char *s, int size, struct tok_state *tok)
+{
+    return fgets(s, size, tok->fp);
+}
+
+static int
+decoding_feof(struct tok_state *tok)
+{
+    return feof(tok->fp);
+}
+
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
+{
+    return new_string(str, strlen(str));
+}
+
+#else /* PGEN */
+
 static char *
 error_ret(struct tok_state *tok) /* XXX */
 {
@@ -116,8 +175,8 @@ error_ret(struct tok_state *tok) /* XXX */
 }
 
 
-static const char *
-get_normal_name(const char *s)  /* for utf-8 and latin-1 */
+static char *
+get_normal_name(char *s)        /* for utf-8 and latin-1 */
 {
     char buf[13];
     int i;
@@ -147,18 +206,17 @@ get_normal_name(const char *s)  /* for utf-8 and latin-1 */
 
 /* Return the coding spec in S, or NULL if none is found.  */
 
-static int
-get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
+static char *
+get_coding_spec(const char *s, Py_ssize_t size)
 {
     Py_ssize_t i;
-    *spec = NULL;
     /* Coding spec must be in a comment, and that comment must be
      * the only statement on the source code line. */
     for (i = 0; i < size - 6; i++) {
         if (s[i] == '#')
             break;
         if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
-            return 1;
+            return NULL;
     }
     for (; i < size - 6; i++) { /* XXX inefficient search */
         const char* t = s + i;
@@ -177,23 +235,20 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
                 t++;
 
             if (begin < t) {
-                char* r = new_string(begin, t - begin, tok);
-                const char* q;
+                char* r = new_string(begin, t - begin);
+                char* q;
                 if (!r)
-                    return 0;
+                    return NULL;
                 q = get_normal_name(r);
                 if (r != q) {
                     PyMem_FREE(r);
-                    r = new_string(q, strlen(q), tok);
-                    if (!r)
-                        return 0;
+                    r = new_string(q, strlen(q));
                 }
-                *spec = r;
-                break;
+                return r;
             }
         }
     }
-    return 1;
+    return NULL;
 }
 
 /* Check whether the line contains a coding spec. If it does,
@@ -205,7 +260,7 @@ static int
 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
                   int set_readline(struct tok_state *, const char *))
 {
-    char *cs;
+    char * cs;
     int r = 1;
 
     if (tok->cont_line) {
@@ -213,8 +268,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
         tok->read_coding_spec = 1;
         return 1;
     }
-    if (!get_coding_spec(line, &cs, size, tok))
-        return 0;
+    cs = get_coding_spec(line, size);
     if (!cs) {
         Py_ssize_t i;
         for (i = 0; i < size; i++) {
@@ -227,31 +281,40 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
                 break;
             }
         }
-        return 1;
-    }
-    tok->read_coding_spec = 1;
-    if (tok->encoding == NULL) {
-        assert(tok->decoding_state == STATE_RAW);
-        if (strcmp(cs, "utf-8") == 0) {
-            tok->encoding = cs;
-        } else {
-            r = set_readline(tok, cs);
-            if (r) {
+    } else {
+        tok->read_coding_spec = 1;
+        if (tok->encoding == NULL) {
+            assert(tok->decoding_state == 1); /* raw */
+            if (strcmp(cs, "utf-8") == 0 ||
+                strcmp(cs, "iso-8859-1") == 0) {
                 tok->encoding = cs;
-                tok->decoding_state = STATE_NORMAL;
-            }
-            else {
-                PyErr_Format(PyExc_SyntaxError,
-                             "encoding problem: %s", cs);
+            } else {
+#ifdef Py_USING_UNICODE
+                r = set_readline(tok, cs);
+                if (r) {
+                    tok->encoding = cs;
+                    tok->decoding_state = -1;
+                }
+                else {
+                    PyErr_Format(PyExc_SyntaxError,
+                                 "encoding problem: %s", cs);
+                    PyMem_FREE(cs);
+                }
+#else
+                /* Without Unicode support, we cannot
+                   process the coding spec. Since there
+                   won't be any Unicode literals, that
+                   won't matter. */
                 PyMem_FREE(cs);
+#endif
             }
+        } else {                /* then, compare cs with BOM */
+            r = (strcmp(tok->encoding, cs) == 0);
+            if (!r)
+                PyErr_Format(PyExc_SyntaxError,
+                             "encoding problem: %s with BOM", cs);
+            PyMem_FREE(cs);
         }
-    } else {                /* then, compare cs with BOM */
-        r = (strcmp(tok->encoding, cs) == 0);
-        if (!r)
-            PyErr_Format(PyExc_SyntaxError,
-                         "encoding problem: %s with BOM", cs);
-        PyMem_FREE(cs);
     }
     return r;
 }
@@ -268,7 +331,7 @@ check_bom(int get_char(struct tok_state *),
 {
     int ch1, ch2, ch3;
     ch1 = get_char(tok);
-    tok->decoding_state = STATE_RAW;
+    tok->decoding_state = 1;
     if (ch1 == EOF) {
         return 1;
     } else if (ch1 == 0xEF) {
@@ -297,7 +360,7 @@ check_bom(int get_char(struct tok_state *),
         }
         if (!set_readline(tok, "utf-16-be"))
             return 0;
-        tok->decoding_state = STATE_NORMAL;
+        tok->decoding_state = -1;
     } else if (ch1 == 0xFF) {
         ch2 = get_char(tok);
         if (ch2 != 0xFE) {
@@ -307,7 +370,7 @@ check_bom(int get_char(struct tok_state *),
         }
         if (!set_readline(tok, "utf-16-le"))
             return 0;
-        tok->decoding_state = STATE_NORMAL;
+        tok->decoding_state = -1;
 #endif
     } else {
         unget_char(ch1, tok);
@@ -315,10 +378,7 @@ check_bom(int get_char(struct tok_state *),
     }
     if (tok->encoding != NULL)
         PyMem_FREE(tok->encoding);
-    tok->encoding = new_string("utf-8", 5, tok);
-    if (!tok->encoding)
-        return 0;
-    /* No need to set_readline: input is already utf-8 */
+    tok->encoding = new_string("utf-8", 5);     /* resulting is in utf-8 */
     return 1;
 }
 
@@ -329,7 +389,7 @@ check_bom(int get_char(struct tok_state *),
      1) NULL: need to call tok->decoding_readline to get a new line
      2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
        stored the result in tok->decoding_buffer
-     3) PyByteArrayObject *: previous call to fp_readl did not have enough room
+     3) PyStringObject *: previous call to fp_readl did not have enough room
        (in the s buffer) to copy entire contents of the line read
        by tok->decoding_readline.  tok->decoding_buffer has the overflow.
        In this case, fp_readl is called in a loop (with an expanded buffer)
@@ -340,62 +400,58 @@ check_bom(int get_char(struct tok_state *),
 static char *
 fp_readl(char *s, int size, struct tok_state *tok)
 {
-    PyObject* bufobj;
-    const char *buf;
-    Py_ssize_t buflen;
+#ifndef Py_USING_UNICODE
+    /* In a non-Unicode built, this should never be called. */
+    Py_FatalError("fp_readl should not be called in this build.");
+    return NULL; /* Keep compiler happy (not reachable) */
+#else
+    PyObject* utf8 = NULL;
+    PyObject* buf = tok->decoding_buffer;
+    char *str;
+    Py_ssize_t utf8len;
 
     /* Ask for one less byte so we can terminate it */
     assert(size > 0);
     size--;
 
-    if (tok->decoding_buffer) {
-        bufobj = tok->decoding_buffer;
-        Py_INCREF(bufobj);
-    }
-    else
-    {
-        bufobj = _PyObject_CallNoArg(tok->decoding_readline);
-        if (bufobj == NULL)
-            goto error;
-    }
-    if (PyUnicode_CheckExact(bufobj))
-    {
-        buf = PyUnicode_AsUTF8AndSize(bufobj, &buflen);
-        if (buf == NULL) {
-            goto error;
+    if (buf == NULL) {
+        buf = PyObject_CallObject(tok->decoding_readline, NULL);
+        if (buf == NULL)
+            return error_ret(tok);
+        if (!PyUnicode_Check(buf)) {
+            Py_DECREF(buf);
+            PyErr_SetString(PyExc_SyntaxError,
+                            "codec did not return a unicode object");
+            return error_ret(tok);
         }
+    } else {
+        tok->decoding_buffer = NULL;
+        if (PyString_CheckExact(buf))
+            utf8 = buf;
     }
-    else
-    {
-        buf = PyByteArray_AsString(bufobj);
-        if (buf == NULL) {
-            goto error;
-        }
-        buflen = PyByteArray_GET_SIZE(bufobj);
+    if (utf8 == NULL) {
+        utf8 = PyUnicode_AsUTF8String(buf);
+        Py_DECREF(buf);
+        if (utf8 == NULL)
+            return error_ret(tok);
     }
-
-    Py_XDECREF(tok->decoding_buffer);
-    if (buflen > size) {
-        /* Too many chars, the rest goes into tok->decoding_buffer */
-        tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
-                                                         buflen-size);
-        if (tok->decoding_buffer == NULL)
-            goto error;
-        buflen = size;
+    str = PyString_AsString(utf8);
+    utf8len = PyString_GET_SIZE(utf8);
+    if (utf8len > size) {
+        tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
+        if (tok->decoding_buffer == NULL) {
+            Py_DECREF(utf8);
+            return error_ret(tok);
+        }
+        utf8len = size;
     }
-    else
-        tok->decoding_buffer = NULL;
-
-    memcpy(s, buf, buflen);
-    s[buflen] = '\0';
-    if (buflen == 0) /* EOF */
-        s = NULL;
-    Py_DECREF(bufobj);
+    memcpy(s, str, utf8len);
+    s[utf8len] = '\0';
+    Py_DECREF(utf8);
+    if (utf8len == 0)
+        return NULL; /* EOF */
     return s;
-
-error:
-    Py_XDECREF(bufobj);
-    return error_ret(tok);
+#endif
 }
 
 /* Set the readline function for TOK to a StreamReader's
@@ -411,48 +467,24 @@ error:
 static int
 fp_setreadl(struct tok_state *tok, const char* enc)
 {
-    PyObject *readline, *io, *stream;
-    _Py_IDENTIFIER(open);
-    _Py_IDENTIFIER(readline);
-    int fd;
-    long pos;
-
-    fd = fileno(tok->fp);
-    /* Due to buffering the file offset for fd can be different from the file
-     * position of tok->fp.  If tok->fp was opened in text mode on Windows,
-     * its file position counts CRLF as one char and can't be directly mapped
-     * to the file offset for fd.  Instead we step back one byte and read to
-     * the end of line.*/
-    pos = ftell(tok->fp);
-    if (pos == -1 ||
-        lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) {
-        PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
-        return 0;
-    }
+    PyObject *reader, *stream, *readline;
 
-    io = PyImport_ImportModuleNoBlock("io");
-    if (io == NULL)
-        return 0;
-
-    stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO",
-                    fd, "r", -1, enc, Py_None, Py_None, Py_False);
-    Py_DECREF(io);
+    /* XXX: constify filename argument. */
+    stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
     if (stream == NULL)
         return 0;
 
-    readline = _PyObject_GetAttrId(stream, &PyId_readline);
+    reader = PyCodec_StreamReader(enc, stream, NULL);
     Py_DECREF(stream);
-    if (readline == NULL)
+    if (reader == NULL)
         return 0;
-    Py_XSETREF(tok->decoding_readline, readline);
 
-    if (pos > 0) {
-        PyObject *bufobj = _PyObject_CallNoArg(readline);
-        if (bufobj == NULL)
-            return 0;
-        Py_DECREF(bufobj);
-    }
+    readline = PyObject_GetAttrString(reader, "readline");
+    Py_DECREF(reader);
+    if (readline == NULL)
+        return 0;
 
+    tok->decoding_readline = readline;
     return 1;
 }
 
@@ -468,34 +500,6 @@ static void fp_ungetc(int c, struct tok_state *tok) {
     ungetc(c, tok->fp);
 }
 
-/* Check whether the characters at s start a valid
-   UTF-8 sequence. Return the number of characters forming
-   the sequence if yes, 0 if not.  */
-static int valid_utf8(const unsigned char* s)
-{
-    int expected = 0;
-    int length;
-    if (*s < 0x80)
-        /* single-byte code */
-        return 1;
-    if (*s < 0xc0)
-        /* following byte */
-        return 0;
-    if (*s < 0xE0)
-        expected = 1;
-    else if (*s < 0xF0)
-        expected = 2;
-    else if (*s < 0xF8)
-        expected = 3;
-    else
-        return 0;
-    length = expected + 1;
-    for (; expected; expected--)
-        if (s[expected] < 0x80 || s[expected] >= 0xC0)
-            return 0;
-    return length;
-}
-
 /* Read a line of input from TOK. Determine encoding
    if necessary.  */
 
@@ -505,12 +509,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
     char *line = NULL;
     int badchar = 0;
     for (;;) {
-        if (tok->decoding_state == STATE_NORMAL) {
+        if (tok->decoding_state < 0) {
             /* We already have a codec associated with
                this input. */
             line = fp_readl(s, size, tok);
             break;
-        } else if (tok->decoding_state == STATE_RAW) {
+        } else if (tok->decoding_state > 0) {
             /* We want a 'raw' read. */
             line = Py_UniversalNewlineFgets(s, size,
                                             tok->fp, NULL);
@@ -521,7 +525,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
                reader functions from now on. */
             if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
                 return error_ret(tok);
-            assert(tok->decoding_state != STATE_INIT);
+            assert(tok->decoding_state != 0);
         }
     }
     if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
@@ -529,40 +533,43 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
             return error_ret(tok);
         }
     }
-    /* The default encoding is UTF-8, so make sure we don't have any
-       non-UTF-8 sequences in it. */
+#ifndef PGEN
+    /* The default encoding is ASCII, so make sure we don't have any
+       non-ASCII bytes in it. */
     if (line && !tok->encoding) {
         unsigned char *c;
-        int length;
-        for (c = (unsigned char *)line; *c; c += length)
-            if (!(length = valid_utf8(c))) {
+        for (c = (unsigned char *)line; *c; c++)
+            if (*c > 127) {
                 badchar = *c;
                 break;
             }
     }
     if (badchar) {
+        char buf[500];
         /* Need to add 1 to the line number, since this line
            has not been counted, yet.  */
-        PyErr_Format(PyExc_SyntaxError,
-                "Non-UTF-8 code starting with '\\x%.2x' "
-                "in file %U on line %i, "
-                "but no encoding declared; "
-                "see http://python.org/dev/peps/pep-0263/ for details",
-                badchar, tok->filename, tok->lineno + 1);
+        sprintf(buf,
+            "Non-ASCII character '\\x%.2x' "
+            "in file %.200s on line %i, "
+            "but no encoding declared; "
+            "see http://python.org/dev/peps/pep-0263/ for details",
+            badchar, tok->filename, tok->lineno + 1);
+        PyErr_SetString(PyExc_SyntaxError, buf);
         return error_ret(tok);
     }
+#endif
     return line;
 }
 
 static int
 decoding_feof(struct tok_state *tok)
 {
-    if (tok->decoding_state != STATE_NORMAL) {
+    if (tok->decoding_state >= 0) {
         return feof(tok->fp);
     } else {
         PyObject* buf = tok->decoding_buffer;
         if (buf == NULL) {
-            buf = _PyObject_CallNoArg(tok->decoding_readline);
+            buf = PyObject_CallObject(tok->decoding_readline, NULL);
             if (buf == NULL) {
                 error_ret(tok);
                 return 1;
@@ -601,6 +608,7 @@ buf_setreadl(struct tok_state *tok, const char* enc) {
 /* Return a UTF-8 encoding Python string object from the
    C byte string STR, which is encoded with ENC. */
 
+#ifdef Py_USING_UNICODE
 static PyObject *
 translate_into_utf8(const char* str, const char* enc) {
     PyObject *utf8;
@@ -611,12 +619,12 @@ translate_into_utf8(const char* str, const char* enc) {
     Py_DECREF(buf);
     return utf8;
 }
+#endif
 
 
 static char *
 translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
-    int skip_next_lf = 0;
-    size_t needed_length = strlen(s) + 2, final_length;
+    int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
     char *buf, *current;
     char c = '\0';
     buf = PyMem_MALLOC(needed_length);
@@ -680,12 +688,14 @@ decode_str(const char *input, int single, struct tok_state *tok)
         return error_ret(tok);
     str = tok->str;             /* string after BOM if any */
     assert(str);
+#ifdef Py_USING_UNICODE
     if (tok->enc != NULL) {
         utf8 = translate_into_utf8(str, tok->enc);
         if (utf8 == NULL)
             return error_ret(tok);
-        str = PyBytes_AsString(utf8);
+        str = PyString_AsString(utf8);
     }
+#endif
     for (s = str;; s++) {
         if (*s == '\0') break;
         else if (*s == '\n') {
@@ -707,18 +717,22 @@ decode_str(const char *input, int single, struct tok_state *tok)
                 return error_ret(tok);
         }
     }
+#ifdef Py_USING_UNICODE
     if (tok->enc != NULL) {
         assert(utf8 == NULL);
         utf8 = translate_into_utf8(str, tok->enc);
         if (utf8 == NULL)
             return error_ret(tok);
-        str = PyBytes_AS_STRING(utf8);
+        str = PyString_AsString(utf8);
     }
+#endif
     assert(tok->decoding_buffer == NULL);
     tok->decoding_buffer = utf8; /* CAUTION */
     return str;
 }
 
+#endif /* PGEN */
+
 /* Set up tokenizer for string */
 
 struct tok_state *
@@ -727,7 +741,7 @@ PyTokenizer_FromString(const char *str, int exec_input)
     struct tok_state *tok = tok_new();
     if (tok == NULL)
         return NULL;
-    str = decode_str(str, exec_input, tok);
+    str = (char *)decode_str(str, exec_input, tok);
     if (str == NULL) {
         PyTokenizer_Free(tok);
         return NULL;
@@ -738,38 +752,11 @@ PyTokenizer_FromString(const char *str, int exec_input)
     return tok;
 }
 
-struct tok_state *
-PyTokenizer_FromUTF8(const char *str, int exec_input)
-{
-    struct tok_state *tok = tok_new();
-    if (tok == NULL)
-        return NULL;
-    tok->input = str = translate_newlines(str, exec_input, tok);
-    if (str == NULL) {
-        PyTokenizer_Free(tok);
-        return NULL;
-    }
-    tok->decoding_state = STATE_RAW;
-    tok->read_coding_spec = 1;
-    tok->enc = NULL;
-    tok->str = str;
-    tok->encoding = (char *)PyMem_MALLOC(6);
-    if (!tok->encoding) {
-        PyTokenizer_Free(tok);
-        return NULL;
-    }
-    strcpy(tok->encoding, "utf-8");
-
-    /* XXX: constify members. */
-    tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
-    return tok;
-}
 
 /* Set up tokenizer for file */
 
 struct tok_state *
-PyTokenizer_FromFile(FILE *fp, const char* enc,
-                     const char *ps1, const char *ps2)
+PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
 {
     struct tok_state *tok = tok_new();
     if (tok == NULL)
@@ -783,17 +770,6 @@ PyTokenizer_FromFile(FILE *fp, const char* enc,
     tok->fp = fp;
     tok->prompt = ps1;
     tok->nextprompt = ps2;
-    if (enc != NULL) {
-        /* Must copy encoding declaration since it
-           gets copied into the parse tree. */
-        tok->encoding = PyMem_MALLOC(strlen(enc)+1);
-        if (!tok->encoding) {
-            PyTokenizer_Free(tok);
-            return NULL;
-        }
-        strcpy(tok->encoding, enc);
-        tok->decoding_state = STATE_NORMAL;
-    }
     return tok;
 }
 
@@ -805,9 +781,10 @@ PyTokenizer_Free(struct tok_state *tok)
 {
     if (tok->encoding != NULL)
         PyMem_FREE(tok->encoding);
+#ifndef PGEN
     Py_XDECREF(tok->decoding_readline);
     Py_XDECREF(tok->decoding_buffer);
-    Py_XDECREF(tok->filename);
+#endif
     if (tok->fp != NULL && tok->buf != NULL)
         PyMem_FREE(tok->buf);
     if (tok->input)
@@ -815,10 +792,74 @@ PyTokenizer_Free(struct tok_state *tok)
     PyMem_FREE(tok);
 }
 
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+static int
+tok_stdin_decode(struct tok_state *tok, char **inp)
+{
+    PyObject *enc, *sysstdin, *decoded, *utf8;
+    const char *encoding;
+    char *converted;
+
+    if (PySys_GetFile((char *)"stdin", NULL) != stdin)
+        return 0;
+    sysstdin = PySys_GetObject("stdin");
+    if (sysstdin == NULL || !PyFile_Check(sysstdin))
+        return 0;
+
+    enc = ((PyFileObject *)sysstdin)->f_encoding;
+    if (enc == NULL || !PyString_Check(enc))
+        return 0;
+    Py_INCREF(enc);
+
+    encoding = PyString_AsString(enc);
+    decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
+    if (decoded == NULL)
+        goto error_clear;
+
+    utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
+    Py_DECREF(decoded);
+    if (utf8 == NULL)
+        goto error_clear;
+
+    assert(PyString_Check(utf8));
+    converted = new_string(PyString_AS_STRING(utf8),
+                           PyString_GET_SIZE(utf8));
+    Py_DECREF(utf8);
+    if (converted == NULL)
+        goto error_nomem;
+
+    PyMem_FREE(*inp);
+    *inp = converted;
+    if (tok->encoding != NULL)
+        PyMem_FREE(tok->encoding);
+    tok->encoding = new_string(encoding, strlen(encoding));
+    if (tok->encoding == NULL)
+        goto error_nomem;
+
+    Py_DECREF(enc);
+    return 0;
+
+error_nomem:
+    Py_DECREF(enc);
+    tok->done = E_NOMEM;
+    return -1;
+
+error_clear:
+    Py_DECREF(enc);
+    if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        tok->done = E_ERROR;
+        return -1;
+    }
+    /* Fallback to iso-8859-1: for backward compatibility */
+    PyErr_Clear();
+    return 0;
+}
+#endif
+
 /* Get next char, updating state; error code goes into tok->done */
 
 static int
-tok_nextc(struct tok_state *tok)
+tok_nextc(register struct tok_state *tok)
 {
     for (;;) {
         if (tok->cur != tok->inp) {
@@ -846,34 +887,6 @@ tok_nextc(struct tok_state *tok)
         }
         if (tok->prompt != NULL) {
             char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
-            if (newtok != NULL) {
-                char *translated = translate_newlines(newtok, 0, tok);
-                PyMem_FREE(newtok);
-                if (translated == NULL)
-                    return EOF;
-                newtok = translated;
-            }
-            if (tok->encoding && newtok && *newtok) {
-                /* Recode to UTF-8 */
-                Py_ssize_t buflen;
-                const char* buf;
-                PyObject *u = translate_into_utf8(newtok, tok->encoding);
-                PyMem_FREE(newtok);
-                if (!u) {
-                    tok->done = E_DECODE;
-                    return EOF;
-                }
-                buflen = PyBytes_GET_SIZE(u);
-                buf = PyBytes_AS_STRING(u);
-                newtok = PyMem_MALLOC(buflen+1);
-                if (newtok == NULL) {
-                    Py_DECREF(u);
-                    tok->done = E_NOMEM;
-                    return EOF;
-                }
-                strcpy(newtok, buf);
-                Py_DECREF(u);
-            }
             if (tok->nextprompt != NULL)
                 tok->prompt = tok->nextprompt;
             if (newtok == NULL)
@@ -882,6 +895,10 @@ tok_nextc(struct tok_state *tok)
                 PyMem_FREE(newtok);
                 tok->done = E_EOF;
             }
+#if !defined(PGEN) && defined(Py_USING_UNICODE)
+            else if (tok_stdin_decode(tok, &newtok) != 0)
+                PyMem_FREE(newtok);
+#endif
             else if (tok->start != NULL) {
                 size_t start = tok->start - tok->buf;
                 size_t oldlen = tok->cur - tok->buf;
@@ -956,7 +973,6 @@ tok_nextc(struct tok_state *tok)
             while (!done) {
                 Py_ssize_t curstart = tok->start == NULL ? -1 :
                           tok->start - tok->buf;
-                Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
                 Py_ssize_t curvalid = tok->inp - tok->buf;
                 Py_ssize_t newsize = curvalid + BUFSIZ;
                 char *newbuf = tok->buf;
@@ -969,7 +985,6 @@ tok_nextc(struct tok_state *tok)
                 }
                 tok->buf = newbuf;
                 tok->cur = tok->buf + cur;
-                tok->multi_line_start = tok->buf + cur_multi_line_start;
                 tok->line_start = tok->cur;
                 tok->inp = tok->buf + curvalid;
                 tok->end = tok->buf + newsize;
@@ -985,8 +1000,7 @@ tok_nextc(struct tok_state *tok)
                         return EOF;
                     /* Last line does not end in \n,
                        fake one */
-                    if (tok->inp[-1] != '\n')
-                        strcpy(tok->inp, "\n");
+                    strcpy(tok->inp, "\n");
                 }
                 tok->inp = strchr(tok->inp, '\0');
                 done = tok->inp[-1] == '\n';
@@ -1018,7 +1032,7 @@ tok_nextc(struct tok_state *tok)
 /* Back-up one character */
 
 static void
-tok_backup(struct tok_state *tok, int c)
+tok_backup(register struct tok_state *tok, register int c)
 {
     if (c != EOF) {
         if (--tok->cur < tok->buf)
@@ -1029,88 +1043,185 @@ tok_backup(struct tok_state *tok, int c)
 }
 
 
-static int
-syntaxerror(struct tok_state *tok, const char *format, ...)
-{
-    va_list vargs;
-#ifdef HAVE_STDARG_PROTOTYPES
-    va_start(vargs, format);
-#else
-    va_start(vargs);
-#endif
-    PyErr_FormatV(PyExc_SyntaxError, format, vargs);
-    va_end(vargs);
-    PyErr_SyntaxLocationObject(tok->filename,
-                               tok->lineno,
-                               (int)(tok->cur - tok->line_start));
-    tok->done = E_ERROR;
-    return ERRORTOKEN;
-}
+/* Return the token corresponding to a single character */
 
-static int
-indenterror(struct tok_state *tok)
+int
+PyToken_OneChar(int c)
 {
-    tok->done = E_TABSPACE;
-    tok->cur = tok->inp;
-    return ERRORTOKEN;
+    switch (c) {
+    case '(':           return LPAR;
+    case ')':           return RPAR;
+    case '[':           return LSQB;
+    case ']':           return RSQB;
+    case ':':           return COLON;
+    case ',':           return COMMA;
+    case ';':           return SEMI;
+    case '+':           return PLUS;
+    case '-':           return MINUS;
+    case '*':           return STAR;
+    case '/':           return SLASH;
+    case '|':           return VBAR;
+    case '&':           return AMPER;
+    case '<':           return LESS;
+    case '>':           return GREATER;
+    case '=':           return EQUAL;
+    case '.':           return DOT;
+    case '%':           return PERCENT;
+    case '`':           return BACKQUOTE;
+    case '{':           return LBRACE;
+    case '}':           return RBRACE;
+    case '^':           return CIRCUMFLEX;
+    case '~':           return TILDE;
+    case '@':       return AT;
+    default:            return OP;
+    }
 }
 
-/* Verify that the identifier follows PEP 3131.
-   All identifier strings are guaranteed to be "ready" unicode objects.
- */
-static int
-verify_identifier(struct tok_state *tok)
+
+int
+PyToken_TwoChars(int c1, int c2)
 {
-    PyObject *s;
-    int result;
-    if (tok->decoding_erred)
-        return 0;
-    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
-    if (s == NULL) {
-        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
-            PyErr_Clear();
-            tok->done = E_IDENTIFIER;
-        } else {
-            tok->done = E_ERROR;
+    switch (c1) {
+    case '=':
+        switch (c2) {
+        case '=':               return EQEQUAL;
         }
-        return 0;
+        break;
+    case '!':
+        switch (c2) {
+        case '=':               return NOTEQUAL;
+        }
+        break;
+    case '<':
+        switch (c2) {
+        case '>':               return NOTEQUAL;
+        case '=':               return LESSEQUAL;
+        case '<':               return LEFTSHIFT;
+        }
+        break;
+    case '>':
+        switch (c2) {
+        case '=':               return GREATEREQUAL;
+        case '>':               return RIGHTSHIFT;
+        }
+        break;
+    case '+':
+        switch (c2) {
+        case '=':               return PLUSEQUAL;
+        }
+        break;
+    case '-':
+        switch (c2) {
+        case '=':               return MINEQUAL;
+        }
+        break;
+    case '*':
+        switch (c2) {
+        case '*':               return DOUBLESTAR;
+        case '=':               return STAREQUAL;
+        }
+        break;
+    case '/':
+        switch (c2) {
+        case '/':               return DOUBLESLASH;
+        case '=':               return SLASHEQUAL;
+        }
+        break;
+    case '|':
+        switch (c2) {
+        case '=':               return VBAREQUAL;
+        }
+        break;
+    case '%':
+        switch (c2) {
+        case '=':               return PERCENTEQUAL;
+        }
+        break;
+    case '&':
+        switch (c2) {
+        case '=':               return AMPEREQUAL;
+        }
+        break;
+    case '^':
+        switch (c2) {
+        case '=':               return CIRCUMFLEXEQUAL;
+        }
+        break;
     }
-    result = PyUnicode_IsIdentifier(s);
-    Py_DECREF(s);
-    if (result == 0)
-        tok->done = E_IDENTIFIER;
-    return result;
+    return OP;
 }
 
-static int
-tok_decimal_tail(struct tok_state *tok)
+int
+PyToken_ThreeChars(int c1, int c2, int c3)
 {
-    int c;
-
-    while (1) {
-        do {
-            c = tok_nextc(tok);
-        } while (isdigit(c));
-        if (c != '_') {
+    switch (c1) {
+    case '<':
+        switch (c2) {
+        case '<':
+            switch (c3) {
+            case '=':
+                return LEFTSHIFTEQUAL;
+            }
             break;
         }
-        c = tok_nextc(tok);
-        if (!isdigit(c)) {
-            tok_backup(tok, c);
-            syntaxerror(tok, "invalid decimal literal");
-            return 0;
+        break;
+    case '>':
+        switch (c2) {
+        case '>':
+            switch (c3) {
+            case '=':
+                return RIGHTSHIFTEQUAL;
+            }
+            break;
+        }
+        break;
+    case '*':
+        switch (c2) {
+        case '*':
+            switch (c3) {
+            case '=':
+                return DOUBLESTAREQUAL;
+            }
+            break;
+        }
+        break;
+    case '/':
+        switch (c2) {
+        case '/':
+            switch (c3) {
+            case '=':
+                return DOUBLESLASHEQUAL;
+            }
+            break;
         }
+        break;
+    }
+    return OP;
+}
+
+static int
+indenterror(struct tok_state *tok)
+{
+    if (tok->alterror) {
+        tok->done = E_TABSPACE;
+        tok->cur = tok->inp;
+        return 1;
+    }
+    if (tok->altwarning) {
+        PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+                          "in indentation\n", tok->filename);
+        tok->altwarning = 0;
     }
-    return c;
+    return 0;
 }
 
 /* Get next token, after space stripping etc. */
 
 static int
-tok_get(struct tok_state *tok, char **p_start, char **p_end)
+tok_get(register struct tok_state *tok, char **p_start, char **p_end)
 {
-    int c;
-    int blankline, nonascii;
+    register int c;
+    int blankline;
 
     *p_start = *p_end = NULL;
   nextline:
@@ -1119,24 +1230,22 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
 
     /* Get indentation level */
     if (tok->atbol) {
-        int col = 0;
-        int altcol = 0;
+        register int col = 0;
+        register int altcol = 0;
         tok->atbol = 0;
         for (;;) {
             c = tok_nextc(tok);
-            if (c == ' ') {
+            if (c == ' ')
                 col++, altcol++;
-            }
             else if (c == '\t') {
-                col = (col / tok->tabsize + 1) * tok->tabsize;
-                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
+                col = (col/tok->tabsize + 1) * tok->tabsize;
+                altcol = (altcol/tok->alttabsize + 1)
+                    * tok->alttabsize;
             }
-            else if (c == '\014')  {/* Control-L (formfeed) */
+            else if (c == '\014') /* Control-L (formfeed) */
                 col = altcol = 0; /* For Emacs users */
-            }
-            else {
+            else
                 break;
-            }
         }
         tok_backup(tok, c);
         if (c == '#' || c == '\n') {
@@ -1145,18 +1254,10 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                not passed to the parser as NEWLINE tokens,
                except *totally* empty lines in interactive
                mode, which signal the end of a command group. */
-            if (col == 0 && c == '\n' && tok->prompt != NULL) {
+            if (col == 0 && c == '\n' && tok->prompt != NULL)
                 blankline = 0; /* Let it through */
-            }
-            else if (tok->prompt != NULL && tok->lineno == 1) {
-                /* In interactive mode, if the first line contains
-                   only spaces and/or a comment, let it through. */
-                blankline = 0;
-                col = altcol = 0;
-            }
-            else {
+            else
                 blankline = 1; /* Ignore completely */
-            }
             /* We can't jump back right here since we still
                may need to skip to the end of a comment */
         }
@@ -1164,7 +1265,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             if (col == tok->indstack[tok->indent]) {
                 /* No change */
                 if (altcol != tok->altindstack[tok->indent]) {
-                    return indenterror(tok);
+                    if (indenterror(tok))
+                        return ERRORTOKEN;
                 }
             }
             else if (col > tok->indstack[tok->indent]) {
@@ -1175,7 +1277,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     return ERRORTOKEN;
                 }
                 if (altcol <= tok->altindstack[tok->indent]) {
-                    return indenterror(tok);
+                    if (indenterror(tok))
+                        return ERRORTOKEN;
                 }
                 tok->pendin++;
                 tok->indstack[++tok->indent] = col;
@@ -1194,7 +1297,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     return ERRORTOKEN;
                 }
                 if (altcol != tok->altindstack[tok->indent]) {
-                    return indenterror(tok);
+                    if (indenterror(tok))
+                        return ERRORTOKEN;
                 }
             }
         }
@@ -1214,31 +1318,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         }
     }
 
-    /* Peek ahead at the next character */
-    c = tok_nextc(tok);
-    tok_backup(tok, c);
-    /* Check if we are closing an async function */
-    if (tok->async_def
-        && !blankline
-        /* Due to some implementation artifacts of type comments,
-         * a TYPE_COMMENT at the start of a function won't set an
-         * indentation level and it will produce a NEWLINE after it.
-         * To avoid spuriously ending an async function due to this,
-         * wait until we have some non-newline char in front of us. */
-        && c != '\n'
-        && tok->level == 0
-        /* There was a NEWLINE after ASYNC DEF,
-           so we're past the signature. */
-        && tok->async_def_nl
-        /* Current indentation level is less than where
-           the async function was defined */
-        && tok->async_def_indent >= tok->indent)
-    {
-        tok->async_def = 0;
-        tok->async_def_indent = 0;
-        tok->async_def_nl = 0;
-    }
-
  again:
     tok->start = NULL;
     /* Skip spaces */
@@ -1249,63 +1328,40 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     /* Set start of current token */
     tok->start = tok->cur - 1;
 
-    /* Skip comment, unless it's a type comment */
+    /* Skip comment, while looking for tab-setting magic */
     if (c == '#') {
-        const char *prefix, *p, *type_start;
-
-        while (c != EOF && c != '\n') {
-            c = tok_nextc(tok);
-        }
-
-        if (tok->type_comments) {
-            p = tok->start;
-            prefix = type_comment_prefix;
-            while (*prefix && p < tok->cur) {
-                if (*prefix == ' ') {
-                    while (*p == ' ' || *p == '\t') {
-                        p++;
-                    }
-                } else if (*prefix == *p) {
-                    p++;
-                } else {
-                    break;
-                }
-
-                prefix++;
-            }
-
-            /* This is a type comment if we matched all of type_comment_prefix. */
-            if (!*prefix) {
-                int is_type_ignore = 1;
-                const char *ignore_end = p + 6;
-                tok_backup(tok, c);  /* don't eat the newline or EOF */
-
-                type_start = p;
-
-                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
-                 * or anything ASCII and non-alphanumeric. */
-                is_type_ignore = (
-                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
-                    && !(tok->cur > ignore_end
-                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
-
-                if (is_type_ignore) {
-                    *p_start = (char *) ignore_end;
-                    *p_end = tok->cur;
-
-                    /* If this type ignore is the only thing on the line, consume the newline also. */
-                    if (blankline) {
-                        tok_nextc(tok);
-                        tok->atbol = 1;
-                    }
-                    return TYPE_IGNORE;
-                } else {
-                    *p_start = (char *) type_start;  /* after type_comment_prefix */
-                    *p_end = tok->cur;
-                    return TYPE_COMMENT;
+        static char *tabforms[] = {
+            "tab-width:",                       /* Emacs */
+            ":tabstop=",                        /* vim, full form */
+            ":ts=",                             /* vim, abbreviated form */
+            "set tabsize=",                     /* will vi never die? */
+        /* more templates can be added here to support other editors */
+        };
+        char cbuf[80];
+        char *tp, **cp;
+        tp = cbuf;
+        do {
+            *tp++ = c = tok_nextc(tok);
+        } while (c != EOF && c != '\n' &&
+                 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
+        *tp = '\0';
+        for (cp = tabforms;
+             cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
+             cp++) {
+            if ((tp = strstr(cbuf, *cp))) {
+                int newsize = atoi(tp + strlen(*cp));
+
+                if (newsize >= 1 && newsize <= 40) {
+                    tok->tabsize = newsize;
+                    if (Py_VerboseFlag)
+                        PySys_WriteStderr(
+                        "Tab size set to %d\n",
+                        newsize);
                 }
             }
         }
+        while (c != EOF && c != '\n')
+            c = tok_nextc(tok);
     }
 
     /* Check for EOF and errors now */
@@ -1314,108 +1370,49 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     }
 
     /* Identifier (most frequent token!) */
-    nonascii = 0;
-    if (is_potential_identifier_start(c)) {
-        /* Process the various legal combinations of b"", r"", u"", and f"". */
-        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
-        while (1) {
-            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
-                saw_b = 1;
-            /* Since this is a backwards compatibility support literal we don't
-               want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r || saw_f)
-                     && (c == 'u'|| c == 'U')) {
-                saw_u = 1;
-            }
-            /* ur"" and ru"" are not supported */
-            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
-                saw_r = 1;
-            }
-            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
-                saw_f = 1;
-            }
-            else {
-                break;
-            }
+    if (Py_ISALPHA(c) || c == '_') {
+        /* Process r"", u"" and ur"" */
+        switch (c) {
+        case 'b':
+        case 'B':
             c = tok_nextc(tok);
-            if (c == '"' || c == '\'') {
+            if (c == 'r' || c == 'R')
+                c = tok_nextc(tok);
+            if (c == '"' || c == '\'')
                 goto letter_quote;
-            }
+            break;
+        case 'r':
+        case 'R':
+            c = tok_nextc(tok);
+            if (c == '"' || c == '\'')
+                goto letter_quote;
+            break;
+        case 'u':
+        case 'U':
+            c = tok_nextc(tok);
+            if (c == 'r' || c == 'R')
+                c = tok_nextc(tok);
+            if (c == '"' || c == '\'')
+                goto letter_quote;
+            break;
         }
-        while (is_potential_identifier_char(c)) {
-            if (c >= 128) {
-                nonascii = 1;
-            }
+        while (c != EOF && (Py_ISALNUM(c) || c == '_')) {
             c = tok_nextc(tok);
         }
         tok_backup(tok, c);
-        if (nonascii && !verify_identifier(tok)) {
-            return ERRORTOKEN;
-        }
         *p_start = tok->start;
         *p_end = tok->cur;
-
-        /* async/await parsing block. */
-        if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
-            /* May be an 'async' or 'await' token.  For Python 3.7 or
-               later we recognize them unconditionally.  For Python
-               3.5 or 3.6 we recognize 'async' in front of 'def', and
-               either one inside of 'async def'.  (Technically we
-               shouldn't recognize these at all for 3.4 or earlier,
-               but there's no *valid* Python 3.4 code that would be
-               rejected, and async functions will be rejected in a
-               later phase.) */
-            if (!tok->async_hacks || tok->async_def) {
-                /* Always recognize the keywords. */
-                if (memcmp(tok->start, "async", 5) == 0) {
-                    return ASYNC;
-                }
-                if (memcmp(tok->start, "await", 5) == 0) {
-                    return AWAIT;
-                }
-            }
-            else if (memcmp(tok->start, "async", 5) == 0) {
-                /* The current token is 'async'.
-                   Look ahead one token to see if that is 'def'. */
-
-                struct tok_state ahead_tok;
-                char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
-                int ahead_tok_kind;
-
-                memcpy(&ahead_tok, tok, sizeof(ahead_tok));
-                ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
-                                         &ahead_tok_end);
-
-                if (ahead_tok_kind == NAME
-                    && ahead_tok.cur - ahead_tok.start == 3
-                    && memcmp(ahead_tok.start, "def", 3) == 0)
-                {
-                    /* The next token is going to be 'def', so instead of
-                       returning a plain NAME token, return ASYNC. */
-                    tok->async_def_indent = tok->indent;
-                    tok->async_def = 1;
-                    return ASYNC;
-                }
-            }
-        }
-
         return NAME;
     }
 
     /* Newline */
     if (c == '\n') {
         tok->atbol = 1;
-        if (blankline || tok->level > 0) {
+        if (blankline || tok->level > 0)
             goto nextline;
-        }
         *p_start = tok->start;
         *p_end = tok->cur - 1; /* Leave '\n' out of the string */
         tok->cont_line = 0;
-        if (tok->async_def) {
-            /* We're somewhere inside an 'async def' function, and
-               we've encountered a NEWLINE after its signature. */
-            tok->async_def_nl = 1;
-        }
         return NEWLINE;
     }
 
@@ -1424,24 +1421,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         c = tok_nextc(tok);
         if (isdigit(c)) {
             goto fraction;
-        } else if (c == '.') {
-            c = tok_nextc(tok);
-            if (c == '.') {
-                *p_start = tok->start;
-                *p_end = tok->cur;
-                return ELLIPSIS;
-            }
-            else {
-                tok_backup(tok, c);
-            }
-            tok_backup(tok, '.');
         }
         else {
             tok_backup(tok, c);
+            *p_start = tok->start;
+            *p_end = tok->cur;
+            return DOT;
         }
-        *p_start = tok->start;
-        *p_end = tok->cur;
-        return DOT;
     }
 
     /* Number */
@@ -1449,136 +1435,94 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
         if (c == '0') {
             /* Hex, octal or binary -- maybe. */
             c = tok_nextc(tok);
+            if (c == '.')
+                goto fraction;
+#ifndef WITHOUT_COMPLEX
+            if (c == 'j' || c == 'J')
+                goto imaginary;
+#endif
             if (c == 'x' || c == 'X') {
+
                 /* Hex */
                 c = tok_nextc(tok);
+                if (!isxdigit(c)) {
+                    tok->done = E_TOKEN;
+                    tok_backup(tok, c);
+                    return ERRORTOKEN;
+                }
                 do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (!isxdigit(c)) {
-                        tok_backup(tok, c);
-                        return syntaxerror(tok, "invalid hexadecimal literal");
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (isxdigit(c));
-                } while (c == '_');
+                    c = tok_nextc(tok);
+                } while (isxdigit(c));
             }
             else if (c == 'o' || c == 'O') {
                 /* Octal */
                 c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (c < '0' || c >= '8') {
-                        tok_backup(tok, c);
-                        if (isdigit(c)) {
-                            return syntaxerror(tok,
-                                    "invalid digit '%c' in octal literal", c);
-                        }
-                        else {
-                            return syntaxerror(tok, "invalid octal literal");
-                        }
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while ('0' <= c && c < '8');
-                } while (c == '_');
-                if (isdigit(c)) {
-                    return syntaxerror(tok,
-                            "invalid digit '%c' in octal literal", c);
+                if (c < '0' || c >= '8') {
+                    tok->done = E_TOKEN;
+                    tok_backup(tok, c);
+                    return ERRORTOKEN;
                 }
+                do {
+                    c = tok_nextc(tok);
+                } while ('0' <= c && c < '8');
             }
             else if (c == 'b' || c == 'B') {
                 /* Binary */
                 c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
-                    if (c != '0' && c != '1') {
-                        tok_backup(tok, c);
-                        if (isdigit(c)) {
-                            return syntaxerror(tok,
-                                    "invalid digit '%c' in binary literal", c);
-                        }
-                        else {
-                            return syntaxerror(tok, "invalid binary literal");
-                        }
-                    }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (c == '0' || c == '1');
-                } while (c == '_');
-                if (isdigit(c)) {
-                    return syntaxerror(tok,
-                            "invalid digit '%c' in binary literal", c);
+                if (c != '0' && c != '1') {
+                    tok->done = E_TOKEN;
+                    tok_backup(tok, c);
+                    return ERRORTOKEN;
                 }
+                do {
+                    c = tok_nextc(tok);
+                } while (c == '0' || c == '1');
             }
             else {
-                int nonzero = 0;
-                /* maybe old-style octal; c is first char of it */
-                /* in any case, allow '0' as a literal */
-                while (1) {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                        if (!isdigit(c)) {
-                            tok_backup(tok, c);
-                            return syntaxerror(tok, "invalid decimal literal");
-                        }
-                    }
-                    if (c != '0') {
-                        break;
-                    }
+                int found_decimal = 0;
+                /* Octal; c is first char of it */
+                /* There's no 'isoctdigit' macro, sigh */
+                while ('0' <= c && c < '8') {
                     c = tok_nextc(tok);
                 }
                 if (isdigit(c)) {
-                    nonzero = 1;
-                    c = tok_decimal_tail(tok);
-                    if (c == 0) {
-                        return ERRORTOKEN;
-                    }
+                    found_decimal = 1;
+                    do {
+                        c = tok_nextc(tok);
+                    } while (isdigit(c));
                 }
-                if (c == '.') {
-                    c = tok_nextc(tok);
+                if (c == '.')
                     goto fraction;
-                }
-                else if (c == 'e' || c == 'E') {
+                else if (c == 'e' || c == 'E')
                     goto exponent;
-                }
-                else if (c == 'j' || c == 'J') {
+#ifndef WITHOUT_COMPLEX
+                else if (c == 'j' || c == 'J')
                     goto imaginary;
-                }
-                else if (nonzero) {
-                    /* Old-style octal: now disallowed. */
+#endif
+                else if (found_decimal) {
+                    tok->done = E_TOKEN;
                     tok_backup(tok, c);
-                    return syntaxerror(tok,
-                                       "leading zeros in decimal integer "
-                                       "literals are not permitted; "
-                                       "use an 0o prefix for octal integers");
+                    return ERRORTOKEN;
                 }
             }
+            if (c == 'l' || c == 'L')
+                c = tok_nextc(tok);
         }
         else {
             /* Decimal */
-            c = tok_decimal_tail(tok);
-            if (c == 0) {
-                return ERRORTOKEN;
-            }
-            {
+            do {
+                c = tok_nextc(tok);
+            } while (isdigit(c));
+            if (c == 'l' || c == 'L')
+                c = tok_nextc(tok);
+            else {
                 /* Accept floating point numbers. */
                 if (c == '.') {
-                    c = tok_nextc(tok);
         fraction:
                     /* Fraction */
-                    if (isdigit(c)) {
-                        c = tok_decimal_tail(tok);
-                        if (c == 0) {
-                            return ERRORTOKEN;
-                        }
-                    }
+                    do {
+                        c = tok_nextc(tok);
+                    } while (isdigit(c));
                 }
                 if (c == 'e' || c == 'E') {
                     int e;
@@ -1589,8 +1533,9 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                     if (c == '+' || c == '-') {
                         c = tok_nextc(tok);
                         if (!isdigit(c)) {
+                            tok->done = E_TOKEN;
                             tok_backup(tok, c);
-                            return syntaxerror(tok, "invalid decimal literal");
+                            return ERRORTOKEN;
                         }
                     } else if (!isdigit(c)) {
                         tok_backup(tok, c);
@@ -1599,16 +1544,16 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         *p_end = tok->cur;
                         return NUMBER;
                     }
-                    c = tok_decimal_tail(tok);
-                    if (c == 0) {
-                        return ERRORTOKEN;
-                    }
+                    do {
+                        c = tok_nextc(tok);
+                    } while (isdigit(c));
                 }
-                if (c == 'j' || c == 'J') {
+#ifndef WITHOUT_COMPLEX
+                if (c == 'j' || c == 'J')
                     /* Imaginary part */
         imaginary:
                     c = tok_nextc(tok);
-                }
+#endif
             }
         }
         tok_backup(tok, c);
@@ -1620,61 +1565,55 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
   letter_quote:
     /* String */
     if (c == '\'' || c == '"') {
+        Py_ssize_t quote2 = tok->cur - tok->start + 1;
         int quote = c;
-        int quote_size = 1;             /* 1 or 3 */
-        int end_quote_size = 0;
-
-        /* Nodes of type STRING, especially multi line strings
-           must be handled differently in order to get both
-           the starting line number and the column offset right.
-           (cf. issue 16806) */
-        tok->first_lineno = tok->lineno;
-        tok->multi_line_start = tok->line_start;
-
-        /* Find the quote size and start of string */
-        c = tok_nextc(tok);
-        if (c == quote) {
-            c = tok_nextc(tok);
-            if (c == quote) {
-                quote_size = 3;
-            }
-            else {
-                end_quote_size = 1;     /* empty string found */
-            }
-        }
-        if (c != quote) {
-            tok_backup(tok, c);
-        }
-
-        /* Get rest of string */
-        while (end_quote_size != quote_size) {
+        int triple = 0;
+        int tripcount = 0;
+        for (;;) {
             c = tok_nextc(tok);
-            if (c == EOF) {
-                if (quote_size == 3) {
-                    tok->done = E_EOFS;
-                }
-                else {
+            if (c == '\n') {
+                if (!triple) {
                     tok->done = E_EOLS;
+                    tok_backup(tok, c);
+                    return ERRORTOKEN;
                 }
-                tok->cur = tok->inp;
-                return ERRORTOKEN;
+                tripcount = 0;
+                tok->cont_line = 1; /* multiline string. */
             }
-            if (quote_size == 1 && c == '\n') {
-                tok->done = E_EOLS;
+            else if (c == EOF) {
+                if (triple)
+                    tok->done = E_EOFS;
+                else
+                    tok->done = E_EOLS;
                 tok->cur = tok->inp;
                 return ERRORTOKEN;
             }
-            if (c == quote) {
-                end_quote_size += 1;
+            else if (c == quote) {
+                tripcount++;
+                if (tok->cur - tok->start == quote2) {
+                    c = tok_nextc(tok);
+                    if (c == quote) {
+                        triple = 1;
+                        tripcount = 0;
+                        continue;
+                    }
+                    tok_backup(tok, c);
+                }
+                if (!triple || tripcount == 3)
+                    break;
             }
-            else {
-                end_quote_size = 0;
-                if (c == '\\') {
-                    tok_nextc(tok);  /* skip escaped char */
+            else if (c == '\\') {
+                tripcount = 0;
+                c = tok_nextc(tok);
+                if (c == EOF) {
+                    tok->done = E_EOLS;
+                    tok->cur = tok->inp;
+                    return ERRORTOKEN;
                 }
             }
+            else
+                tripcount = 0;
         }
-
         *p_start = tok->start;
         *p_end = tok->cur;
         return STRING;
@@ -1688,14 +1627,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
             tok->cur = tok->inp;
             return ERRORTOKEN;
         }
-        c = tok_nextc(tok);
-        if (c == EOF) {
-            tok->done = E_EOF;
-            tok->cur = tok->inp;
-            return ERRORTOKEN;
-        } else {
-            tok_backup(tok, c);
-        }
         tok->cont_line = 1;
         goto again; /* Read next line */
     }
@@ -1704,13 +1635,24 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     {
         int c2 = tok_nextc(tok);
         int token = PyToken_TwoChars(c, c2);
+#ifndef PGEN
+        if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
+            if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
+                                   "<> not supported in 3.x; use !=",
+                                   tok->filename, tok->lineno,
+                                   NULL, NULL)) {
+                tok->done = E_ERROR;
+                tok->cur = tok->inp;
+                return ERRORTOKEN;
+            }
+        }
+#endif
         if (token != OP) {
             int c3 = tok_nextc(tok);
             int token3 = PyToken_ThreeChars(c, c2, c3);
             if (token3 != OP) {
                 token = token3;
-            }
-            else {
+            } else {
                 tok_backup(tok, c3);
             }
             *p_start = tok->start;
@@ -1725,38 +1667,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
     case '(':
     case '[':
     case '{':
-        if (tok->level >= MAXLEVEL) {
-            return syntaxerror(tok, "too many nested parentheses");
-        }
-        tok->parenstack[tok->level] = c;
-        tok->parenlinenostack[tok->level] = tok->lineno;
         tok->level++;
         break;
     case ')':
     case ']':
     case '}':
-        if (!tok->level) {
-            return syntaxerror(tok, "unmatched '%c'", c);
-        }
         tok->level--;
-        int opening = tok->parenstack[tok->level];
-        if (!((opening == '(' && c == ')') ||
-              (opening == '[' && c == ']') ||
-              (opening == '{' && c == '}')))
-        {
-            if (tok->parenlinenostack[tok->level] != tok->lineno) {
-                return syntaxerror(tok,
-                        "closing parenthesis '%c' does not match "
-                        "opening parenthesis '%c' on line %d",
-                        c, opening, tok->parenlinenostack[tok->level]);
-            }
-            else {
-                return syntaxerror(tok,
-                        "closing parenthesis '%c' does not match "
-                        "opening parenthesis '%c'",
-                        c, opening);
-            }
-        }
         break;
     }
 
@@ -1770,6 +1686,11 @@ int
 PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
 {
     int result = tok_get(tok, p_start, p_end);
+    if (tok->fp && ferror(tok->fp)) {
+        clearerr(tok->fp);
+        result = ERRORTOKEN;
+        tok->done = E_IO;
+    }
     if (tok->decoding_erred) {
         result = ERRORTOKEN;
         tok->done = E_DECODE;
@@ -1777,67 +1698,67 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
     return result;
 }
 
-/* Get the encoding of a Python file. Check for the coding cookie and check if
-   the file starts with a BOM.
-
-   PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
-   encoding in the first or second line of the file (in which case the encoding
-   should be assumed to be UTF-8).
-
-   The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
-   by the caller. */
+/* This function is only called from parsetok. However, it cannot live
+   there, as it must be empty for PGEN, and we can check for PGEN only
+   in this file. */
 
-char *
-PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
+#if defined(PGEN) || !defined(Py_USING_UNICODE)
+char*
+PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
 {
-    struct tok_state *tok;
-    FILE *fp;
-    char *p_start =NULL , *p_end =NULL , *encoding = NULL;
-
-    fd = _Py_dup(fd);
-    if (fd < 0) {
-        return NULL;
-    }
-
-    fp = fdopen(fd, "r");
-    if (fp == NULL) {
-        return NULL;
-    }
-    tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
-    if (tok == NULL) {
-        fclose(fp);
-        return NULL;
-    }
-    if (filename != NULL) {
-        Py_INCREF(filename);
-        tok->filename = filename;
-    }
-    else {
-        tok->filename = PyUnicode_FromString("<string>");
-        if (tok->filename == NULL) {
-            fclose(fp);
-            PyTokenizer_Free(tok);
-            return encoding;
-        }
-    }
-    while (tok->lineno < 2 && tok->done == E_OK) {
-        PyTokenizer_Get(tok, &p_start, &p_end);
+    return NULL;
+}
+#else
+#ifdef Py_USING_UNICODE
+static PyObject *
+dec_utf8(const char *enc, const char *text, size_t len) {
+    PyObject *ret = NULL;
+    PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
+    if (unicode_text) {
+        ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
+        Py_DECREF(unicode_text);
     }
-    fclose(fp);
-    if (tok->encoding) {
-        encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
-        if (encoding)
-            strcpy(encoding, tok->encoding);
+    if (!ret) {
+        PyErr_Clear();
     }
-    PyTokenizer_Free(tok);
-    return encoding;
+    return ret;
 }
-
 char *
-PyTokenizer_FindEncoding(int fd)
+PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 {
-    return PyTokenizer_FindEncodingFilename(fd, NULL);
+    char *text = NULL;
+    if (tok->encoding) {
+        /* convert source to original encondig */
+        PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
+        if (lineobj != NULL) {
+            int linelen = PyString_Size(lineobj);
+            const char *line = PyString_AsString(lineobj);
+            text = PyObject_MALLOC(linelen + 1);
+            if (text != NULL && line != NULL) {
+                if (linelen)
+                    strncpy(text, line, linelen);
+                text[linelen] = '\0';
+            }
+            Py_DECREF(lineobj);
+
+            /* adjust error offset */
+            if (*offset > 1) {
+                PyObject *offsetobj = dec_utf8(tok->encoding,
+                                               tok->buf, *offset-1);
+                if (offsetobj) {
+                    *offset = PyString_Size(offsetobj) + 1;
+                    Py_DECREF(offsetobj);
+                }
+            }
+
+        }
+    }
+    return text;
+
 }
+#endif /* defined(Py_USING_UNICODE) */
+#endif
+
 
 #ifdef Py_DEBUG
 
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 92669bf..f15e252 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -11,13 +11,6 @@ extern "C" {
 #include "token.h"      /* For token types */
 
 #define MAXINDENT 100   /* Max indentation level */
-#define MAXLEVEL 200    /* Max parentheses level */
-
-enum decoding_state {
-    STATE_INIT,
-    STATE_RAW,
-    STATE_NORMAL        /* have a codec associated with input */
-};
 
 /* Tokenizer state */
 struct tok_state {
@@ -36,51 +29,40 @@ struct tok_state {
     int indstack[MAXINDENT];            /* Stack of indents */
     int atbol;          /* Nonzero if at begin of new line */
     int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
-    const char *prompt, *nextprompt;          /* For interactive prompting */
+    char *prompt, *nextprompt;          /* For interactive prompting */
     int lineno;         /* Current line number */
-    int first_lineno;   /* First line of a single line or multi line string
-                           expression (cf. issue 16806) */
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
-    char parenstack[MAXLEVEL];
-    int parenlinenostack[MAXLEVEL];
-    PyObject *filename;
     /* Stuff for checking on different tab sizes */
+    const char *filename;       /* For error messages */
+    int altwarning;     /* Issue warning if alternate tabs don't match */
+    int alterror;       /* Issue error if alternate tabs don't match */
+    int alttabsize;     /* Alternate tab spacing */
     int altindstack[MAXINDENT];         /* Stack of alternate indents */
     /* Stuff for PEP 0263 */
-    enum decoding_state decoding_state;
+    int decoding_state;         /* -1:decoding, 0:init, 1:raw */
     int decoding_erred;         /* whether erred in decoding  */
     int read_coding_spec;       /* whether 'coding:...' has been read  */
-    char *encoding;         /* Source encoding. */
+    char *encoding;
     int cont_line;          /* whether we are in a continuation line. */
     const char* line_start;     /* pointer to start of current line */
-    const char* multi_line_start; /* pointer to start of first line of
-                                     a single line or multi line string
-                                     expression (cf. issue 16806) */
-    PyObject *decoding_readline; /* open(...).readline */
+#ifndef PGEN
+    PyObject *decoding_readline; /* codecs.open(...).readline */
     PyObject *decoding_buffer;
-    const char* enc;        /* Encoding for the current str. */
+#endif
+    const char* enc;
     const char* str;
     const char* input; /* Tokenizer's newline translated copy of the string. */
-
-    int type_comments;      /* Whether to look for type comments */
-
-    /* async/await related fields (still needed depending on feature_version) */
-    int async_hacks;     /* =1 if async/await aren't always keywords */
-    int async_def;        /* =1 if tokens are inside an 'async def' body. */
-    int async_def_indent; /* Indentation level of the outermost 'async def'. */
-    int async_def_nl;     /* =1 if the outermost 'async def' had at least one
-                             NEWLINE token after it. */
 };
 
 extern struct tok_state *PyTokenizer_FromString(const char *, int);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
-extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
-                                              const char *, const char *);
+extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
 extern void PyTokenizer_Free(struct tok_state *);
 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
-
-#define tok_dump _Py_tok_dump
+#if defined(PGEN) || defined(Py_USING_UNICODE)
+extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
+                                          int len, int *offset);
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/Parser/tokenizer_pgen.c b/Parser/tokenizer_pgen.c
new file mode 100644
index 0000000..9cb8492
--- /dev/null
+++ b/Parser/tokenizer_pgen.c
@@ -0,0 +1,2 @@
+#define PGEN
+#include "tokenizer.c"