diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-04-04 22:39:01 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-04-04 22:39:01 (GMT) |
commit | 7f2fee36401f7b987a368fe043637b3ae7116600 (patch) | |
tree | d21cec17c440b8255cf9bc4c79269d6bfb3253f0 /Parser | |
parent | 9bdb43e43f9f5d80699e297c2f73c106179b33d5 (diff) | |
download | cpython-7f2fee36401f7b987a368fe043637b3ae7116600.zip cpython-7f2fee36401f7b987a368fe043637b3ae7116600.tar.gz cpython-7f2fee36401f7b987a368fe043637b3ae7116600.tar.bz2 |
Issue #10785: Store the filename as Unicode in the Python parser.
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/parsetok.c | 32 | ||||
-rw-r--r-- | Parser/parsetok_pgen.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 35 | ||||
-rw-r--r-- | Parser/tokenizer.h | 8 |
4 files changed, 56 insertions, 21 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 2251cac..eef650a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -13,7 +13,7 @@ /* Forward */ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); -static void initerr(perrdetail *err_ret, const char* filename); +static int initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ node * @@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, struct tok_state *tok; int exec_input = start == file_input; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if (*flags & PyPARSE_IGNORE_COOKIE) tok = PyTokenizer_FromUTF8(s, exec_input); @@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, return NULL; } - tok->filename = filename ? filename : "<string>"; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, { struct tok_state *tok; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } - tok->filename = filename; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -267,14 +275,24 @@ done: return n; } -static void +static int initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; - err_ret->filename = filename; err_ret->lineno = 0; err_ret->offset = 0; err_ret->text = NULL; err_ret->token = -1; err_ret->expected = -1; +#ifndef PGEN + if (filename) + err_ret->filename = PyUnicode_DecodeFSDefault(filename); + else + err_ret->filename = PyUnicode_FromString("<string>"); + if (err_ret->filename == NULL) { + err_ret->error = E_ERROR; + return -1; + } +#endif + return 0; } diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c new file mode 100644 index 0000000..97b9288 --- /dev/null +++ b/Parser/parsetok_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "parsetok.c" diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3f6be2f..5edd958 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -128,7 +128,6 @@ tok_new(void) tok->prompt = tok->nextprompt = NULL; tok->lineno = 0; tok->level = 0; - tok->filename = NULL; tok->altwarning = 1; tok->alterror = 1; tok->alttabsize = 1; @@ -140,6 +139,7 @@ tok_new(void) tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN + tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif @@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; - PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok) if (badchar) { /* Need to add 1 to the line number, since this line has not been counted, yet. */ - filename = PyUnicode_DecodeFSDefault(tok->filename); - if (filename != NULL) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, filename, tok->lineno + 1); - Py_DECREF(filename); - } + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } #endif @@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok) #ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->filename); #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); @@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok) return 1; } if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " +#ifdef PGEN + PySys_WriteStderr("inconsistent use of tabs and spaces " + "in indentation\n"); +#else + PySys_FormatStderr("%U: inconsistent use of tabs and spaces " "in indentation\n", tok->filename); +#endif tok->altwarning = 0; } return 0; @@ -1718,6 +1719,11 @@ PyTokenizer_FindEncoding(int fd) fclose(fp); return NULL; } +#ifndef PGEN + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) + goto error; +#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1727,6 +1733,9 @@ PyTokenizer_FindEncoding(int fd) if (encoding) strcpy(encoding, tok->encoding); } +#ifndef PGEN +error: +#endif PyTokenizer_Free(tok); return encoding; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2be3bf2..3a0d3cb 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,13 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* encoded to the filesystem encoding */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ |