diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 6 | ||||
-rw-r--r-- | Parser/asdl.py | 2 | ||||
-rw-r--r-- | Parser/node.c | 3 | ||||
-rw-r--r-- | Parser/parser.c | 14 | ||||
-rw-r--r-- | Parser/parser.h | 2 | ||||
-rw-r--r-- | Parser/parsetok.c | 9 | ||||
-rw-r--r-- | Parser/tokenizer.c | 5 | ||||
-rw-r--r-- | Parser/tokenizer.h | 1 |
8 files changed, 29 insertions, 13 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index bcb5ecb..4397d89 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -46,7 +46,8 @@ module Python version "$Revision$" | Pass | Break | Continue -- XXX Jython will be different - attributes (int lineno) + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) -- BoolOp() can use left & right? expr = BoolOp(boolop op, expr* values) @@ -76,7 +77,8 @@ module Python version "$Revision$" | List(expr* elts, expr_context ctx) | Tuple(expr *elts, expr_context ctx) - attributes (int lineno) + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) expr_context = Load | Store | Del | AugLoad | AugStore | Param diff --git a/Parser/asdl.py b/Parser/asdl.py index 3a69308..bd892b6 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -156,6 +156,8 @@ class ASDLParser(spark.GenericParser, object): if id.value != "attributes": raise ASDLSyntaxError(id.lineno, msg="expected attributes, found %s" % id) + if attributes: + attributes.reverse() return Sum(sum, attributes) def p_product(self, (_0, fields, _1)): diff --git a/Parser/node.c b/Parser/node.c index 75900ce..7ed6c0e 100644 --- a/Parser/node.c +++ b/Parser/node.c @@ -76,7 +76,7 @@ fancy_roundup(int n) int -PyNode_AddChild(register node *n1, int type, char *str, int lineno) +PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) { const int nch = n1->n_nchildren; int current_capacity; @@ -103,6 +103,7 @@ PyNode_AddChild(register node *n1, int type, char *str, int lineno) n->n_type = type; n->n_str = str; n->n_lineno = lineno; + n->n_col_offset = col_offset; n->n_nchildren = 0; n->n_child = NULL; return 0; diff --git a/Parser/parser.c b/Parser/parser.c index 4a5307c..ada6be2 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -105,11 +105,11 @@ PyParser_Delete(parser_state *ps) /* PARSER STACK OPERATIONS */ static int -shift(register stack *s, int type, char *str, int newstate, int lineno) +shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) { int err; assert(!s_empty(s)); - err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno); + err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); if (err) return err; s->s_top->s_state = newstate; @@ -117,13 +117,13 @@ shift(register stack *s, int type, char *str, int newstate, int lineno) } static int -push(register stack *s, int type, dfa *d, int newstate, int lineno) +push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) { int err; register node *n; n = s->s_top->s_parent; assert(!s_empty(s)); - err = PyNode_AddChild(n, type, (char *)NULL, lineno); + err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); if (err) return err; s->s_top->s_state = newstate; @@ -213,7 +213,7 @@ future_hack(parser_state *ps) int PyParser_AddToken(register parser_state *ps, register int type, char *str, - int lineno, int *expected_ret) + int lineno, int col_offset, int *expected_ret) { register int ilabel; int err; @@ -245,7 +245,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str, dfa *d1 = PyGrammar_FindDFA( ps->p_grammar, nt); if ((err = push(&ps->p_stack, nt, d1, - arrow, lineno)) > 0) { + arrow, lineno, col_offset)) > 0) { D(printf(" MemError: push\n")); return err; } @@ -255,7 +255,7 @@ PyParser_AddToken(register parser_state *ps, register int type, char *str, /* Shift the token */ if ((err = shift(&ps->p_stack, type, str, - x, lineno)) > 0) { + x, lineno, col_offset)) > 0) { D(printf(" MemError: shift.\n")); return err; } diff --git a/Parser/parser.h b/Parser/parser.h index f5d2d0d..bdca3e9 100644 --- a/Parser/parser.h +++ b/Parser/parser.h @@ -32,7 +32,7 @@ typedef struct { parser_state *PyParser_New(grammar *g, int start); void PyParser_Delete(parser_state *ps); -int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, +int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset, int *expected_ret); void PyGrammar_AddAccelerators(grammar *g); diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 8b1f70c..77a2cac 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -130,6 +130,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, int type; size_t len; char *str; + int col_offset; type = PyTokenizer_Get(tok, &a, &b); if (type == ERRORTOKEN) { @@ -185,9 +186,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, len == 4 && str[0] == 'w' && strcmp(str, "with") == 0) handling_with = 1; #endif - + if (a >= tok->line_start) + col_offset = a - tok->line_start; + else + col_offset = -1; + if ((err_ret->error = - PyParser_AddToken(ps, (int)type, str, tok->lineno, + PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, &(err_ret->expected))) != E_OK) { if (err_ret->error != E_DONE) PyObject_FREE(str); diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 036bed8..4174e9c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -764,6 +764,7 @@ tok_nextc(register struct tok_state *tok) } if (tok->start == NULL) tok->buf = tok->cur; + tok->line_start = tok->cur; tok->lineno++; tok->inp = end; return Py_CHARMASK(*tok->cur++); @@ -798,6 +799,7 @@ tok_nextc(register struct tok_state *tok) } tok->buf = buf; tok->cur = tok->buf + oldlen; + tok->line_start = tok->cur; strcpy(tok->buf + oldlen, new); PyMem_FREE(new); tok->inp = tok->buf + newlen; @@ -809,7 +811,9 @@ tok_nextc(register struct tok_state *tok) if (tok->buf != NULL) PyMem_DEL(tok->buf); tok->buf = new; + tok->line_start = tok->buf; tok->cur = tok->buf; + tok->line_start = tok->buf; tok->inp = strchr(tok->buf, '\0'); tok->end = tok->inp + 1; } @@ -877,6 +881,7 @@ tok_nextc(register struct tok_state *tok) done = tok->inp[-1] == '\n'; } tok->cur = tok->buf + cur; + tok->line_start = tok->cur; /* replace "\r\n" with "\n" */ /* For Mac we leave the \r, giving a syntax error */ pt = tok->inp - 2; diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 0b360617..5e7ebf7 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -45,6 +45,7 @@ struct tok_state { int read_coding_spec; /* whether 'coding:...' has been read */ char *encoding; int cont_line; /* whether we are in a continuation line. */ + const char* line_start; /* pointer to start of current line */ #ifndef PGEN PyObject *decoding_readline; /* codecs.open(...).readline */ PyObject *decoding_buffer; |