summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl6
-rw-r--r--Parser/asdl.py2
-rw-r--r--Parser/node.c3
-rw-r--r--Parser/parser.c14
-rw-r--r--Parser/parser.h2
-rw-r--r--Parser/parsetok.c9
-rw-r--r--Parser/tokenizer.c5
-rw-r--r--Parser/tokenizer.h1
8 files changed, 29 insertions, 13 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index bcb5ecb..4397d89 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -46,7 +46,8 @@ module Python version "$Revision$"
| Pass | Break | Continue
-- XXX Jython will be different
- attributes (int lineno)
+ -- col_offset is the byte offset in the utf8 string the parser uses
+ attributes (int lineno, int col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
@@ -76,7 +77,8 @@ module Python version "$Revision$"
| List(expr* elts, expr_context ctx)
| Tuple(expr *elts, expr_context ctx)
- attributes (int lineno)
+ -- col_offset is the byte offset in the utf8 string the parser uses
+ attributes (int lineno, int col_offset)
expr_context = Load | Store | Del | AugLoad | AugStore | Param
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 3a69308..bd892b6 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -156,6 +156,8 @@ class ASDLParser(spark.GenericParser, object):
if id.value != "attributes":
raise ASDLSyntaxError(id.lineno,
msg="expected attributes, found %s" % id)
+ if attributes:
+ attributes.reverse()
return Sum(sum, attributes)
def p_product(self, (_0, fields, _1)):
diff --git a/Parser/node.c b/Parser/node.c
index 75900ce..7ed6c0e 100644
--- a/Parser/node.c
+++ b/Parser/node.c
@@ -76,7 +76,7 @@ fancy_roundup(int n)
int
-PyNode_AddChild(register node *n1, int type, char *str, int lineno)
+PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset)
{
const int nch = n1->n_nchildren;
int current_capacity;
@@ -103,6 +103,7 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno)
n->n_type = type;
n->n_str = str;
n->n_lineno = lineno;
+ n->n_col_offset = col_offset;
n->n_nchildren = 0;
n->n_child = NULL;
return 0;
diff --git a/Parser/parser.c b/Parser/parser.c
index 4a5307c..ada6be2 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -105,11 +105,11 @@ PyParser_Delete(parser_state *ps)
/* PARSER STACK OPERATIONS */
static int
-shift(register stack *s, int type, char *str, int newstate, int lineno)
+shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
{
int err;
assert(!s_empty(s));
- err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno);
+ err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
@@ -117,13 +117,13 @@ shift(register stack *s, int type, char *str, int newstate, int lineno)
}
static int
-push(register stack *s, int type, dfa *d, int newstate, int lineno)
+push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
{
int err;
register node *n;
n = s->s_top->s_parent;
assert(!s_empty(s));
- err = PyNode_AddChild(n, type, (char *)NULL, lineno);
+ err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
@@ -213,7 +213,7 @@ future_hack(parser_state *ps)
int
PyParser_AddToken(register parser_state *ps, register int type, char *str,
- int lineno, int *expected_ret)
+ int lineno, int col_offset, int *expected_ret)
{
register int ilabel;
int err;
@@ -245,7 +245,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str,
dfa *d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
- arrow, lineno)) > 0) {
+ arrow, lineno, col_offset)) > 0) {
D(printf(" MemError: push\n"));
return err;
}
@@ -255,7 +255,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str,
/* Shift the token */
if ((err = shift(&ps->p_stack, type, str,
- x, lineno)) > 0) {
+ x, lineno, col_offset)) > 0) {
D(printf(" MemError: shift.\n"));
return err;
}
diff --git a/Parser/parser.h b/Parser/parser.h
index f5d2d0d..bdca3e9 100644
--- a/Parser/parser.h
+++ b/Parser/parser.h
@@ -32,7 +32,7 @@ typedef struct {
parser_state *PyParser_New(grammar *g, int start);
void PyParser_Delete(parser_state *ps);
-int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno,
+int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
int *expected_ret);
void PyGrammar_AddAccelerators(grammar *g);
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 8b1f70c..77a2cac 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -130,6 +130,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
int type;
size_t len;
char *str;
+ int col_offset;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
@@ -185,9 +186,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
handling_with = 1;
#endif
-
+ if (a >= tok->line_start)
+ col_offset = a - tok->line_start;
+ else
+ col_offset = -1;
+
if ((err_ret->error =
- PyParser_AddToken(ps, (int)type, str, tok->lineno,
+ PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE)
PyObject_FREE(str);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 036bed8..4174e9c 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -764,6 +764,7 @@ tok_nextc(register struct tok_state *tok)
}
if (tok->start == NULL)
tok->buf = tok->cur;
+ tok->line_start = tok->cur;
tok->lineno++;
tok->inp = end;
return Py_CHARMASK(*tok->cur++);
@@ -798,6 +799,7 @@ tok_nextc(register struct tok_state *tok)
}
tok->buf = buf;
tok->cur = tok->buf + oldlen;
+ tok->line_start = tok->cur;
strcpy(tok->buf + oldlen, new);
PyMem_FREE(new);
tok->inp = tok->buf + newlen;
@@ -809,7 +811,9 @@ tok_nextc(register struct tok_state *tok)
if (tok->buf != NULL)
PyMem_DEL(tok->buf);
tok->buf = new;
+ tok->line_start = tok->buf;
tok->cur = tok->buf;
+ tok->line_start = tok->buf;
tok->inp = strchr(tok->buf, '\0');
tok->end = tok->inp + 1;
}
@@ -877,6 +881,7 @@ tok_nextc(register struct tok_state *tok)
done = tok->inp[-1] == '\n';
}
tok->cur = tok->buf + cur;
+ tok->line_start = tok->cur;
/* replace "\r\n" with "\n" */
/* For Mac we leave the \r, giving a syntax error */
pt = tok->inp - 2;
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 0b360617..5e7ebf7 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -45,6 +45,7 @@ struct tok_state {
int read_coding_spec; /* whether 'coding:...' has been read */
char *encoding;
int cont_line; /* whether we are in a continuation line. */
+ const char* line_start; /* pointer to start of current line */
#ifndef PGEN
PyObject *decoding_readline; /* codecs.open(...).readline */
PyObject *decoding_buffer;