diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 4 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 8 | ||||
-rw-r--r-- | Parser/grammar.mak | 45 | ||||
-rw-r--r-- | Parser/myreadline.c | 2 | ||||
-rw-r--r-- | Parser/parsetok.c | 19 | ||||
-rw-r--r-- | Parser/pgenmain.c | 2 | ||||
-rw-r--r-- | Parser/printgrammar.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 140 | ||||
-rw-r--r-- | Parser/tokenizer.h | 7 |
9 files changed, 131 insertions, 98 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 789e07b..9407b2f 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -17,7 +17,7 @@ module Python version "$Revision$" expr? starargs, expr? kwargs, stmt* body, - expr *decorator_list) + expr* decorator_list) | Return(expr? value) | Delete(expr* targets) @@ -36,7 +36,7 @@ module Python version "$Revision$" | Assert(expr test, expr? msg) | Import(alias* names) - | ImportFrom(identifier module, alias* names, int? level) + | ImportFrom(identifier? module, alias* names, int? level) | Global(identifier* names) | Nonlocal(identifier* names) diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 6df11f5..d6555d6 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -376,6 +376,7 @@ class Obj2ModVisitor(PickleVisitor): self.emit(format % error, 1, reflow=False) if add_label: self.emit("failed:", 1) + self.emit("Py_XDECREF(tmp);", 1) self.emit("return 1;", 1) self.emit("}", 0) self.emit("", 0) @@ -720,7 +721,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int } PyTuple_SET_ITEM(fnames, i, field); } - result = PyObject_CallFunction((PyObject*)&PyType_Type, "U(O){sOss}", + result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}", type, base, "_fields", fnames, "__module__", "_ast"); Py_DECREF(fnames); return (PyTypeObject*)result; @@ -730,8 +731,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) { int i, result; PyObject *s, *l = PyTuple_New(num_fields); - if (!l) return 0; - for(i = 0; i < num_fields; i++) { + if (!l) + return 0; + for (i = 0; i < num_fields; i++) { s = PyUnicode_FromString(attrs[i]); if (!s) { Py_DECREF(l); diff --git a/Parser/grammar.mak b/Parser/grammar.mak deleted file mode 100644 index 55f028f..0000000 --- a/Parser/grammar.mak +++ /dev/null @@ -1,45 +0,0 @@ -# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via -# -# nmake /f grammar.mak -# -# You may also need to copy python23.dll into this directory, or get -# it on your search path. -# -# The intermediate files can be nuked afterwards: -# -# nmake /f grammar.mak clean -# -# I don't understand the maze of preprocessor #define's on Windows, and -# as a result this requires linking with python23.lib, so it's of no use -# for bootstrapping (the cause appears to be a useless-- in this -# particular case --pragma in PC\pyconfig.h, which demands that -# python23.lib get linked in). - -LIBS= ..\PCbuild\python25.lib - -CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD - -GRAMMAR_H= ..\Include\graminit.h -GRAMMAR_C= ..\Python\graminit.c -GRAMMAR_INPUT= ..\Grammar\Grammar - -PGEN= pgen.exe - -POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \ - parsetok.obj tokenizer.obj bitset.obj metagrammar.obj - -PARSER_OBJS= $(POBJS) myreadline.obj - -PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj - -PGENOBJS= $(POBJS) $(PGOBJS) - -$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT) - $(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C) - -$(PGEN): $(PGENOBJS) - $(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN) - -clean: - del *.obj - del $(PGEN) diff --git a/Parser/myreadline.c b/Parser/myreadline.c index 7166fc1..50802c3 100644 --- a/Parser/myreadline.c +++ b/Parser/myreadline.c @@ -87,7 +87,7 @@ my_fgets(char *buf, int len, FILE *fp) #endif if (s < 0) return 1; - /* try again */ + /* try again */ continue; } #endif diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 16cf5cb..7636a54 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, perrdetail *err_ret, int *flags) { struct tok_state *tok; + int exec_input = start == file_input; initerr(err_ret, filename); if (*flags & PyPARSE_IGNORE_COOKIE) - tok = PyTokenizer_FromUTF8(s); + tok = PyTokenizer_FromUTF8(s, exec_input); else - tok = PyTokenizer_FromString(s); + tok = PyTokenizer_FromString(s, exec_input); if (tok == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; @@ -240,16 +241,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } } } else if (tok->encoding != NULL) { + /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was + * allocated using PyMem_ + */ node* r = PyNode_New(encoding_decl); - if (!r) { + if (r) + r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); + if (!r || !r->n_str) { err_ret->error = E_NOMEM; + if (r) + PyObject_FREE(r); n = NULL; goto done; } - r->n_str = tok->encoding; + strcpy(r->n_str, tok->encoding); + PyMem_FREE(tok->encoding); + tok->encoding = NULL; r->n_nchildren = 1; r->n_child = n; - tok->encoding = NULL; n = r; } diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 88fa7f1..4b7b55a 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -13,6 +13,8 @@ - check for duplicate definitions of names (instead of fatal err) */ +#define PGEN + #include "Python.h" #include "pgenheaders.h" #include "grammar.h" diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c index 01f552f..dd7e6ae 100644 --- a/Parser/printgrammar.c +++ b/Parser/printgrammar.c @@ -1,6 +1,8 @@ /* Print a bunch of C initializers that represent a grammar */ +#define PGEN + #include "pgenheaders.h" #include "grammar.h" diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d985131..3f6be2f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -119,6 +119,7 @@ tok_new(void) tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->done = E_OK; tok->fp = NULL; + tok->input = NULL; tok->tabsize = TABSIZE; tok->indent = 0; tok->indstack[0] = 0; @@ -145,6 +146,17 @@ tok_new(void) return tok; } +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + #ifdef PGEN static char * @@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok) return feof(tok->fp); } -static const char * -decode_str(const char *str, struct tok_state *tok) +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok) { - return str; + return new_string(str, strlen(str)); } #else /* PGEN */ @@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */ return NULL; /* as if it were EOF */ } -static char * -new_string(const char *s, Py_ssize_t len) -{ - char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; - } - return result; -} static char * get_normal_name(char *s) /* for utf-8 and latin-1 */ @@ -243,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size) } while (t[0] == '\x20' || t[0] == '\t'); begin = t; - while (isalnum(Py_CHARMASK(t[0])) || + while (Py_ISALNUM(t[0]) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; @@ -460,17 +462,20 @@ static int fp_setreadl(struct tok_state *tok, const char* enc) { PyObject *readline = NULL, *stream = NULL, *io = NULL; + int fd; io = PyImport_ImportModuleNoBlock("io"); if (io == NULL) goto cleanup; - if (tok->filename) - stream = PyObject_CallMethod(io, "open", "ssis", - tok->filename, "r", -1, enc); - else - stream = PyObject_CallMethod(io, "open", "isisOOO", - fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False); + fd = fileno(tok->fp); + if (lseek(fd, 0, SEEK_SET) == (off_t)-1) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL); + goto cleanup; + } + + stream = PyObject_CallMethod(io, "open", "isisOOO", + fd, "r", -1, enc, Py_None, Py_None, Py_False); if (stream == NULL) goto cleanup; @@ -540,6 +545,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; + PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -578,16 +584,18 @@ decoding_fgets(char *s, int size, struct tok_state *tok) } } if (badchar) { - char buf[500]; /* Need to add 1 to the line number, since this line has not been counted, yet. */ - sprintf(buf, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %.200s on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, tok->filename, tok->lineno + 1); - PyErr_SetString(PyExc_SyntaxError, buf); + filename = PyUnicode_DecodeFSDefault(tok->filename); + if (filename != NULL) { + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, filename, tok->lineno + 1); + Py_DECREF(filename); + } return error_ret(tok); } #endif @@ -652,17 +660,62 @@ translate_into_utf8(const char* str, const char* enc) { return utf8; } + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { + int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; + char *buf, *current; + char c = '\0'; + buf = PyMem_MALLOC(needed_length); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; *s; s++, current++) { + c = *s; + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *++s; + if (!c) + break; + } + } + if (c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + } + /* If this is exec input, add a newline to the end of the string if + there isn't one already. */ + if (exec_input && c != '\n') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf + 1; + if (final_length < needed_length && final_length) + /* should never fail */ + buf = PyMem_REALLOC(buf, final_length); + return buf; +} + /* Decode a byte string STR for use as the buffer of TOK. Look for encoding declarations inside STR, and record them inside TOK. */ static const char * -decode_str(const char *str, struct tok_state *tok) +decode_str(const char *input, int single, struct tok_state *tok) { PyObject* utf8 = NULL; + const char *str; const char *s; const char *newl[2] = {NULL, NULL}; int lineno = 0; + tok->input = str = translate_newlines(input, single, tok); + if (str == NULL) + return NULL; tok->enc = NULL; tok->str = str; if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) @@ -713,12 +766,12 @@ decode_str(const char *str, struct tok_state *tok) /* Set up tokenizer for string */ struct tok_state * -PyTokenizer_FromString(const char *str) +PyTokenizer_FromString(const char *str, int exec_input) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; - str = (char *)decode_str(str, tok); + str = (char *)decode_str(str, exec_input, tok); if (str == NULL) { PyTokenizer_Free(tok); return NULL; @@ -730,11 +783,18 @@ PyTokenizer_FromString(const char *str) } struct tok_state * -PyTokenizer_FromUTF8(const char *str) +PyTokenizer_FromUTF8(const char *str, int exec_input) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; +#ifndef PGEN + tok->input = str = translate_newlines(str, exec_input, tok); +#endif + if (str == NULL) { + PyTokenizer_Free(tok); + return NULL; + } tok->decoding_state = STATE_RAW; tok->read_coding_spec = 1; tok->enc = NULL; @@ -751,7 +811,6 @@ PyTokenizer_FromUTF8(const char *str) return tok; } - /* Set up tokenizer for file */ struct tok_state * @@ -797,6 +856,8 @@ PyTokenizer_Free(struct tok_state *tok) #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); + if (tok->input) + PyMem_FREE((char *)tok->input); PyMem_FREE(tok); } @@ -832,6 +893,13 @@ tok_nextc(register struct tok_state *tok) if (tok->prompt != NULL) { char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); #ifndef PGEN + if (newtok != NULL) { + char *translated = translate_newlines(newtok, 0, tok); + PyMem_FREE(newtok); + if (translated == NULL) + return EOF; + newtok = translated; + } if (tok->encoding && newtok && *newtok) { /* Recode to UTF-8 */ Py_ssize_t buflen; @@ -1407,10 +1475,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); if (c == '.') goto fraction; -#ifndef WITHOUT_COMPLEX if (c == 'j' || c == 'J') goto imaginary; -#endif if (c == 'x' || c == 'X') { /* Hex */ @@ -1462,10 +1528,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto fraction; else if (c == 'e' || c == 'E') goto exponent; -#ifndef WITHOUT_COMPLEX else if (c == 'j' || c == 'J') goto imaginary; -#endif else if (nonzero) { tok->done = E_TOKEN; tok_backup(tok, c); @@ -1502,12 +1566,10 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } while (isdigit(c)); } -#ifndef WITHOUT_COMPLEX if (c == 'j' || c == 'J') /* Imaginary part */ imaginary: c = tok_nextc(tok); -#endif } } tok_backup(tok, c); diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 6c1742f..2be3bf2 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -53,15 +53,16 @@ struct tok_state { int cont_line; /* whether we are in a continuation line. */ const char* line_start; /* pointer to start of current line */ #ifndef PGEN - PyObject *decoding_readline; /* codecs.open(...).readline */ + PyObject *decoding_readline; /* open(...).readline */ PyObject *decoding_buffer; #endif const char* enc; /* Encoding for the current str. */ const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ }; -extern struct tok_state *PyTokenizer_FromString(const char *); -extern struct tok_state *PyTokenizer_FromUTF8(const char *); +extern struct tok_state *PyTokenizer_FromString(const char *, int); +extern struct tok_state *PyTokenizer_FromUTF8(const char *, int); extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, char *, char *); extern void PyTokenizer_Free(struct tok_state *); |