9 files changed, 131 insertions, 98 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 789e07b..9407b2f 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -17,7 +17,7 @@ module Python version "$Revision$"
 			 expr? starargs,
 			 expr? kwargs,
 			 stmt* body,
-			 expr *decorator_list)
+			 expr* decorator_list)
 	      | Return(expr? value)
 
 	      | Delete(expr* targets)
@@ -36,7 +36,7 @@ module Python version "$Revision$"
 	      | Assert(expr test, expr? msg)
 
 	      | Import(alias* names)
-	      | ImportFrom(identifier module, alias* names, int? level)
+	      | ImportFrom(identifier? module, alias* names, int? level)
 
 	      | Global(identifier* names)
 	      | Nonlocal(identifier* names)
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 6df11f5..d6555d6 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -376,6 +376,7 @@ class Obj2ModVisitor(PickleVisitor):
         self.emit(format % error, 1, reflow=False)
         if add_label:
             self.emit("failed:", 1)
+            self.emit("Py_XDECREF(tmp);", 1)
         self.emit("return 1;", 1)
         self.emit("}", 0)
         self.emit("", 0)
@@ -720,7 +721,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int
         }
         PyTuple_SET_ITEM(fnames, i, field);
     }
-    result = PyObject_CallFunction((PyObject*)&PyType_Type, "U(O){sOss}",
+    result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}",
                     type, base, "_fields", fnames, "__module__", "_ast");
     Py_DECREF(fnames);
     return (PyTypeObject*)result;
@@ -730,8 +731,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
 {
     int i, result;
     PyObject *s, *l = PyTuple_New(num_fields);
-    if (!l) return 0;
-    for(i = 0; i < num_fields; i++) {
+    if (!l)
+        return 0;
+    for (i = 0; i < num_fields; i++) {
         s = PyUnicode_FromString(attrs[i]);
         if (!s) {
             Py_DECREF(l);
diff --git a/Parser/grammar.mak b/Parser/grammar.mak
deleted file mode 100644
index 55f028f..0000000
--- a/Parser/grammar.mak
+++ /dev/null
@@ -1,45 +0,0 @@
-# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via
-#
-#     nmake /f grammar.mak
-#
-# You may also need to copy python23.dll into this directory, or get
-# it on your search path.
-#
-# The intermediate files can be nuked afterwards:
-#
-#     nmake /f grammar.mak clean
-#
-# I don't understand the maze of preprocessor #define's on Windows, and
-# as a result this requires linking with python23.lib, so it's of no use
-# for bootstrapping (the cause appears to be a useless-- in this
-# particular case --pragma in PC\pyconfig.h, which demands that
-# python23.lib get linked in).
-
-LIBS= ..\PCbuild\python25.lib
-
-CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD
-
-GRAMMAR_H= ..\Include\graminit.h
-GRAMMAR_C= ..\Python\graminit.c
-GRAMMAR_INPUT= ..\Grammar\Grammar
-
-PGEN= pgen.exe
-
-POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \
-       parsetok.obj tokenizer.obj bitset.obj metagrammar.obj
-
-PARSER_OBJS= $(POBJS) myreadline.obj
-
-PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj
-
-PGENOBJS= $(POBJS) $(PGOBJS)
-
-$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT)
-		$(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C)
-
-$(PGEN):	$(PGENOBJS)
-		$(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN)
-
-clean:
-        del *.obj
-        del $(PGEN)
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 7166fc1..50802c3 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -87,7 +87,7 @@ my_fgets(char *buf, int len, FILE *fp)
 #endif
             if (s < 0)
                     return 1;
-            /* try again */
+	    /* try again */
             continue;
         }
 #endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 16cf5cb..7636a54 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
                           perrdetail *err_ret, int *flags)
 {
     struct tok_state *tok;
+    int exec_input = start == file_input;
 
     initerr(err_ret, filename);
 
     if (*flags & PyPARSE_IGNORE_COOKIE)
-        tok = PyTokenizer_FromUTF8(s);
+        tok = PyTokenizer_FromUTF8(s, exec_input);
     else
-        tok = PyTokenizer_FromString(s);
+        tok = PyTokenizer_FromString(s, exec_input);
     if (tok == NULL) {
         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
         return NULL;
@@ -240,16 +241,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
             }
         }
     } else if (tok->encoding != NULL) {
+        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+         * allocated using PyMem_
+         */
         node* r = PyNode_New(encoding_decl);
-        if (!r) {
+        if (r)
+            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+        if (!r || !r->n_str) {
             err_ret->error = E_NOMEM;
+            if (r)
+                PyObject_FREE(r);
             n = NULL;
             goto done;
         }
-        r->n_str = tok->encoding;
+        strcpy(r->n_str, tok->encoding);
+        PyMem_FREE(tok->encoding);
+        tok->encoding = NULL;
         r->n_nchildren = 1;
         r->n_child = n;
-        tok->encoding = NULL;
         n = r;
     }
 
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 88fa7f1..4b7b55a 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -13,6 +13,8 @@
    - check for duplicate definitions of names (instead of fatal err)
 */
 
+#define PGEN
+
 #include "Python.h"
 #include "pgenheaders.h"
 #include "grammar.h"
diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c
index 01f552f..dd7e6ae 100644
--- a/Parser/printgrammar.c
+++ b/Parser/printgrammar.c
@@ -1,6 +1,8 @@
 
 /* Print a bunch of C initializers that represent a grammar */
 
+#define PGEN
+
 #include "pgenheaders.h"
 #include "grammar.h"
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d985131..3f6be2f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -119,6 +119,7 @@ tok_new(void)
     tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
     tok->done = E_OK;
     tok->fp = NULL;
+    tok->input = NULL;
     tok->tabsize = TABSIZE;
     tok->indent = 0;
     tok->indstack[0] = 0;
@@ -145,6 +146,17 @@ tok_new(void)
     return tok;
 }
 
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+    char* result = (char *)PyMem_MALLOC(len + 1);
+    if (result != NULL) {
+        memcpy(result, s, len);
+        result[len] = '\0';
+    }
+    return result;
+}
+
 #ifdef PGEN
 
 static char *
@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
     return feof(tok->fp);
 }
 
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
 {
-    return str;
+    return new_string(str, strlen(str));
 }
 
 #else /* PGEN */
@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
     return NULL;                /* as if it were EOF */
 }
 
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
-    char* result = (char *)PyMem_MALLOC(len + 1);
-    if (result != NULL) {
-        memcpy(result, s, len);
-        result[len] = '\0';
-    }
-    return result;
-}
 
 static char *
 get_normal_name(char *s)        /* for utf-8 and latin-1 */
@@ -243,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
             } while (t[0] == '\x20' || t[0] == '\t');
 
             begin = t;
-            while (isalnum(Py_CHARMASK(t[0])) ||
+            while (Py_ISALNUM(t[0]) ||
                    t[0] == '-' || t[0] == '_' || t[0] == '.')
                 t++;
 
@@ -460,17 +462,20 @@ static int
 fp_setreadl(struct tok_state *tok, const char* enc)
 {
     PyObject *readline = NULL, *stream = NULL, *io = NULL;
+    int fd;
 
     io = PyImport_ImportModuleNoBlock("io");
     if (io == NULL)
         goto cleanup;
 
-    if (tok->filename)
-        stream = PyObject_CallMethod(io, "open", "ssis",
-                                     tok->filename, "r", -1, enc);
-    else
-        stream = PyObject_CallMethod(io, "open", "isisOOO",
-                        fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
+    fd = fileno(tok->fp);
+    if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+        PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
+        goto cleanup;
+    }
+
+    stream = PyObject_CallMethod(io, "open", "isisOOO",
+                    fd, "r", -1, enc, Py_None, Py_None, Py_False);
     if (stream == NULL)
         goto cleanup;
 
@@ -540,6 +545,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
 {
     char *line = NULL;
     int badchar = 0;
+    PyObject *filename;
     for (;;) {
         if (tok->decoding_state == STATE_NORMAL) {
             /* We already have a codec associated with
@@ -578,16 +584,18 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
             }
     }
     if (badchar) {
-        char buf[500];
         /* Need to add 1 to the line number, since this line
            has not been counted, yet.  */
-        sprintf(buf,
-            "Non-UTF-8 code starting with '\\x%.2x' "
-            "in file %.200s on line %i, "
-            "but no encoding declared; "
-            "see http://python.org/dev/peps/pep-0263/ for details",
-            badchar, tok->filename, tok->lineno + 1);
-        PyErr_SetString(PyExc_SyntaxError, buf);
+        filename = PyUnicode_DecodeFSDefault(tok->filename);
+        if (filename != NULL) {
+            PyErr_Format(PyExc_SyntaxError,
+                    "Non-UTF-8 code starting with '\\x%.2x' "
+                    "in file %U on line %i, "
+                    "but no encoding declared; "
+                    "see http://python.org/dev/peps/pep-0263/ for details",
+                    badchar, filename, tok->lineno + 1);
+            Py_DECREF(filename);
+        }
         return error_ret(tok);
     }
 #endif
@@ -652,17 +660,62 @@ translate_into_utf8(const char* str, const char* enc) {
     return utf8;
 }
 
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+    int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+    char *buf, *current;
+    char c = '\0';
+    buf = PyMem_MALLOC(needed_length);
+    if (buf == NULL) {
+        tok->done = E_NOMEM;
+        return NULL;
+    }
+    for (current = buf; *s; s++, current++) {
+        c = *s;
+        if (skip_next_lf) {
+            skip_next_lf = 0;
+            if (c == '\n') {
+                c = *++s;
+                if (!c)
+                    break;
+            }
+        }
+        if (c == '\r') {
+            skip_next_lf = 1;
+            c = '\n';
+        }
+        *current = c;
+    }
+    /* If this is exec input, add a newline to the end of the string if
+       there isn't one already. */
+    if (exec_input && c != '\n') {
+        *current = '\n';
+        current++;
+    }
+    *current = '\0';
+    final_length = current - buf + 1;
+    if (final_length < needed_length && final_length)
+        /* should never fail */
+        buf = PyMem_REALLOC(buf, final_length);
+    return buf;
+}
+
 /* Decode a byte string STR for use as the buffer of TOK.
    Look for encoding declarations inside STR, and record them
    inside TOK.  */
 
 static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
 {
     PyObject* utf8 = NULL;
+    const char *str;
     const char *s;
     const char *newl[2] = {NULL, NULL};
     int lineno = 0;
+    tok->input = str = translate_newlines(input, single, tok);
+    if (str == NULL)
+        return NULL;
     tok->enc = NULL;
     tok->str = str;
     if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -713,12 +766,12 @@ decode_str(const char *str, struct tok_state *tok)
 /* Set up tokenizer for string */
 
 struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
 {
     struct tok_state *tok = tok_new();
     if (tok == NULL)
         return NULL;
-    str = (char *)decode_str(str, tok);
+    str = (char *)decode_str(str, exec_input, tok);
     if (str == NULL) {
         PyTokenizer_Free(tok);
         return NULL;
@@ -730,11 +783,18 @@ PyTokenizer_FromString(const char *str)
 }
 
 struct tok_state *
-PyTokenizer_FromUTF8(const char *str)
+PyTokenizer_FromUTF8(const char *str, int exec_input)
 {
     struct tok_state *tok = tok_new();
     if (tok == NULL)
         return NULL;
+#ifndef PGEN
+    tok->input = str = translate_newlines(str, exec_input, tok);
+#endif
+    if (str == NULL) {
+        PyTokenizer_Free(tok);
+        return NULL;
+    }
     tok->decoding_state = STATE_RAW;
     tok->read_coding_spec = 1;
     tok->enc = NULL;
@@ -751,7 +811,6 @@ PyTokenizer_FromUTF8(const char *str)
     return tok;
 }
 
-
 /* Set up tokenizer for file */
 
 struct tok_state *
@@ -797,6 +856,8 @@ PyTokenizer_Free(struct tok_state *tok)
 #endif
     if (tok->fp != NULL && tok->buf != NULL)
         PyMem_FREE(tok->buf);
+    if (tok->input)
+        PyMem_FREE((char *)tok->input);
     PyMem_FREE(tok);
 }
 
@@ -832,6 +893,13 @@ tok_nextc(register struct tok_state *tok)
         if (tok->prompt != NULL) {
             char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
 #ifndef PGEN
+            if (newtok != NULL) {
+                char *translated = translate_newlines(newtok, 0, tok);
+                PyMem_FREE(newtok);
+                if (translated == NULL)
+                    return EOF;
+                newtok = translated;
+            }
             if (tok->encoding && newtok && *newtok) {
                 /* Recode to UTF-8 */
                 Py_ssize_t buflen;
@@ -1407,10 +1475,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
             c = tok_nextc(tok);
             if (c == '.')
                 goto fraction;
-#ifndef WITHOUT_COMPLEX
             if (c == 'j' || c == 'J')
                 goto imaginary;
-#endif
             if (c == 'x' || c == 'X') {
 
                 /* Hex */
@@ -1462,10 +1528,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                     goto fraction;
                 else if (c == 'e' || c == 'E')
                     goto exponent;
-#ifndef WITHOUT_COMPLEX
                 else if (c == 'j' || c == 'J')
                     goto imaginary;
-#endif
                 else if (nonzero) {
                     tok->done = E_TOKEN;
                     tok_backup(tok, c);
@@ -1502,12 +1566,10 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                         c = tok_nextc(tok);
                     } while (isdigit(c));
                 }
-#ifndef WITHOUT_COMPLEX
                 if (c == 'j' || c == 'J')
                     /* Imaginary part */
         imaginary:
                     c = tok_nextc(tok);
-#endif
             }
         }
         tok_backup(tok, c);
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 6c1742f..2be3bf2 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -53,15 +53,16 @@ struct tok_state {
     int cont_line;          /* whether we are in a continuation line. */
     const char* line_start;     /* pointer to start of current line */
 #ifndef PGEN
-    PyObject *decoding_readline; /* codecs.open(...).readline */
+    PyObject *decoding_readline; /* open(...).readline */
     PyObject *decoding_buffer;
 #endif
     const char* enc;        /* Encoding for the current str. */
     const char* str;
+    const char* input; /* Tokenizer's newline translated copy of the string. */
 };
 
-extern struct tok_state *PyTokenizer_FromString(const char *);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
 extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
                                               char *, char *);
 extern void PyTokenizer_Free(struct tok_state *);