8 files changed, 29 insertions, 13 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index bcb5ecb..4397d89 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -46,7 +46,8 @@ module Python version "$Revision$"
 	      | Pass | Break | Continue
 
 	      -- XXX Jython will be different
-	      attributes (int lineno)
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
 
 	      -- BoolOp() can use left & right?
 	expr = BoolOp(boolop op, expr* values)
@@ -76,7 +77,8 @@ module Python version "$Revision$"
 	     | List(expr* elts, expr_context ctx) 
 	     | Tuple(expr *elts, expr_context ctx)
 
-	      attributes (int lineno)
+	      -- col_offset is the byte offset in the utf8 string the parser uses
+	      attributes (int lineno, int col_offset)
 
 	expr_context = Load | Store | Del | AugLoad | AugStore | Param
 
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 3a69308..bd892b6 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -156,6 +156,8 @@ class ASDLParser(spark.GenericParser, object):
         if id.value != "attributes":
             raise ASDLSyntaxError(id.lineno,
                                   msg="expected attributes, found %s" % id)
+        if attributes:
+            attributes.reverse()
         return Sum(sum, attributes)
 
     def p_product(self, (_0, fields, _1)):
diff --git a/Parser/node.c b/Parser/node.c
index 75900ce..7ed6c0e 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -76,7 +76,7 @@ fancy_roundup(int n)
 
 
 int
-PyNode_AddChild(register node *n1, int type, char *str, int lineno)
+PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
 {
 	const int nch = n1->n_nchildren;
 	int current_capacity;
@@ -103,6 +103,7 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno)
 	n->n_type = type;
 	n->n_str = str;
 	n->n_lineno = lineno;
+	n->n_col_offset = col_offset;
 	n->n_nchildren = 0;
 	n->n_child = NULL;
 	return 0;
diff --git a/Parser/parser.c b/Parser/parser.c
index 4a5307c..ada6be2 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -105,11 +105,11 @@ PyParser_Delete(parser_state *ps)
 /* PARSER STACK OPERATIONS */
 
 static int
-shift(register stack *s, int type, char *str, int newstate, int lineno)
+shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
 {
 	int err;
 	assert(!s_empty(s));
-	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
+	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
 	if (err)
 		return err;
 	s->s_top->s_state = newstate;
@@ -117,13 +117,13 @@ shift(register stack *s, int type, char *str, int newstate, int lineno)
 }
 
 static int
-push(register stack *s, int type, dfa *d, int newstate, int lineno)
+push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
 {
 	int err;
 	register node *n;
 	n = s->s_top->s_parent;
 	assert(!s_empty(s));
-	err = PyNode_AddChild(n, type, (char *)NULL, lineno);
+	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
 	if (err)
 		return err;
 	s->s_top->s_state = newstate;
@@ -213,7 +213,7 @@ future_hack(parser_state *ps)
 
 int
 PyParser_AddToken(register parser_state *ps, register int type, char *str,
-	          int lineno, int *expected_ret)
+	          int lineno, int col_offset, int *expected_ret)
 {
 	register int ilabel;
 	int err;
@@ -245,7 +245,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str,
 					dfa *d1 = PyGrammar_FindDFA(
 						ps->p_grammar, nt);
 					if ((err = push(&ps->p_stack, nt, d1,
-						arrow, lineno)) > 0) {
+						arrow, lineno, col_offset)) > 0) {
 						D(printf(" MemError: push\n"));
 						return err;
 					}
@@ -255,7 +255,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str,
 				
 				/* Shift the token */
 				if ((err = shift(&ps->p_stack, type, str,
-						x, lineno)) > 0) {
+						x, lineno, col_offset)) > 0) {
 					D(printf(" MemError: shift.\n"));
 					return err;
 				}
diff --git a/Parser/parser.h b/Parser/parser.h
index f5d2d0d..bdca3e9 100644
--- a/Parser/parser.h
+++ b/Parser/parser.h
@@ -32,7 +32,7 @@ typedef struct {
 
 parser_state *PyParser_New(grammar *g, int start);
 void PyParser_Delete(parser_state *ps);
-int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno,
+int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
                       int *expected_ret);
 void PyGrammar_AddAccelerators(grammar *g);
 
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 8b1f70c..77a2cac 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -130,6 +130,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 		int type;
 		size_t len;
 		char *str;
+		int col_offset;
 
 		type = PyTokenizer_Get(tok, &a, &b);
 		if (type == ERRORTOKEN) {
@@ -185,9 +186,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 			 len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
 			handling_with = 1;
 #endif
-
+		if (a >= tok->line_start)
+			col_offset = a - tok->line_start;
+		else
+			col_offset = -1;
+			
 		if ((err_ret->error =
-		     PyParser_AddToken(ps, (int)type, str, tok->lineno,
+		     PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
 				       &(err_ret->expected))) != E_OK) {
 			if (err_ret->error != E_DONE)
 				PyObject_FREE(str);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 036bed8..4174e9c 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -764,6 +764,7 @@ tok_nextc(register struct tok_state *tok)
 			}
 			if (tok->start == NULL)
 				tok->buf = tok->cur;
+			tok->line_start = tok->cur;
 			tok->lineno++;
 			tok->inp = end;
 			return Py_CHARMASK(*tok->cur++);
@@ -798,6 +799,7 @@ tok_nextc(register struct tok_state *tok)
 				}
 				tok->buf = buf;
 				tok->cur = tok->buf + oldlen;
+				tok->line_start = tok->cur;
 				strcpy(tok->buf + oldlen, new);
 				PyMem_FREE(new);
 				tok->inp = tok->buf + newlen;
@@ -809,7 +811,9 @@ tok_nextc(register struct tok_state *tok)
 				if (tok->buf != NULL)
 					PyMem_DEL(tok->buf);
 				tok->buf = new;
+				tok->line_start = tok->buf;
 				tok->cur = tok->buf;
+				tok->line_start = tok->buf;
 				tok->inp = strchr(tok->buf, '\0');
 				tok->end = tok->inp + 1;
 			}
@@ -877,6 +881,7 @@ tok_nextc(register struct tok_state *tok)
 				done = tok->inp[-1] == '\n';
 			}
 			tok->cur = tok->buf + cur;
+			tok->line_start = tok->cur;
 			/* replace "\r\n" with "\n" */
 			/* For Mac we leave the \r, giving a syntax error */
 			pt = tok->inp - 2;
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 0b360617..5e7ebf7 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -45,6 +45,7 @@ struct tok_state {
 	int read_coding_spec;	/* whether 'coding:...' has been read  */
 	char *encoding;
 	int cont_line;          /* whether we are in a continuation line. */
+	const char* line_start;	/* pointer to start of current line */
 #ifndef PGEN
 	PyObject *decoding_readline; /* codecs.open(...).readline */
 	PyObject *decoding_buffer;