diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 2 | ||||
-rw-r--r-- | Parser/asdl.py | 23 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 21 | ||||
-rw-r--r-- | Parser/parsetok.c | 35 | ||||
-rw-r--r-- | Parser/parsetok_pgen.c | 2 | ||||
-rw-r--r-- | Parser/pgenmain.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 64 | ||||
-rw-r--r-- | Parser/tokenizer.h | 9 |
8 files changed, 104 insertions, 54 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 9407b2f..8e2e1ac 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -1,6 +1,6 @@ -- ASDL's four builtin types are identifier, int, string, object -module Python version "$Revision$" +module Python { mod = Module(stmt* body) | Interactive(stmt* body) diff --git a/Parser/asdl.py b/Parser/asdl.py index 7b4e2dc..c63dfa7 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object): raise ASDLSyntaxError(tok.lineno, tok) def p_module_0(self, info): - " module ::= Id Id version { } " - module, name, version, _0, _1 = info + " module ::= Id Id { } " + module, name, _0, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, None, version) + return Module(name, None) def p_module(self, info): - " module ::= Id Id version { definitions } " - module, name, version, _0, definitions, _1 = info + " module ::= Id Id { definitions } " + module, name, _0, definitions, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, definitions, version) - - def p_version(self, info): - "version ::= Id String" - version, V = info - if version.value != "version": - raise ASDLSyntaxError(version.lineno, - msg="expected 'version', found %" % version) - return V + return Module(name, definitions) def p_definition_0(self, definition): " definitions ::= definition " @@ -246,10 +238,9 @@ class AST(object): pass # a marker class class Module(AST): - def __init__(self, name, dfns, version): + def __init__(self, name, dfns): self.name = name self.dfns = dfns - self.version = version self.types = {} # maps type name to value (from dfns) for type in dfns: self.types[type.name.value] = type.value diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index d6555d6..cdce5a3 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -5,6 +5,7 @@ # handle fields that have a type but no name import os, sys +import subprocess import asdl @@ -882,9 +883,6 @@ static int add_ast_fields(void) self.emit("if (!%s_singleton) return 0;" % cons.name, 1) -def parse_version(mod): - return mod.version.value[12:-3] - class ASTModuleVisitor(PickleVisitor): def visitModule(self, mod): @@ -904,7 +902,7 @@ class ASTModuleVisitor(PickleVisitor): self.emit("return NULL;", 2) # Value of version: "$Revision$" self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' - % parse_version(mod), 1) + % (mod.version,), 1) self.emit("return NULL;", 2) for dfn in mod.dfns: self.visit(dfn) @@ -1137,6 +1135,18 @@ c_file_msg = """ """ + +def get_file_revision(f): + """Fish out the last change to a file in hg.""" + args = ["hg", "log", "--template", "{node|short}", "--limit", "1", f] + p = subprocess.Popen(args, stdout=subprocess.PIPE) + out = p.communicate()[0] + if p.returncode: + print >> sys.stderr, "error return code from hg" + sys.exit(1) + return out + + def main(srcfile): argv0 = sys.argv[0] components = argv0.split(os.sep) @@ -1145,6 +1155,7 @@ def main(srcfile): mod = asdl.parse(srcfile) if not asdl.check(mod): sys.exit(1) + mod.version = get_file_revision(srcfile) if INC_DIR: p = "%s/%s-ast.h" % (INC_DIR, mod.name) f = open(p, "w") @@ -1164,7 +1175,7 @@ def main(srcfile): p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") f = open(p, "w") f.write(auto_gen_msg) - f.write(c_file_msg % parse_version(mod)) + f.write(c_file_msg % (mod.version,)) f.write('#include "Python.h"\n') f.write('#include "%s-ast.h"\n' % mod.name) f.write('\n') diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 7636a54..eef650a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -13,7 +13,7 @@ /* Forward */ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); -static void initerr(perrdetail *err_ret, const char* filename); +static int initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ node * @@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, struct tok_state *tok; int exec_input = start == file_input; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if (*flags & PyPARSE_IGNORE_COOKIE) tok = PyTokenizer_FromUTF8(s, exec_input); @@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, return NULL; } - tok->filename = filename ? filename : "<string>"; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, { struct tok_state *tok; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } - tok->filename = filename; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -127,7 +135,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, { parser_state *ps; node *n; - int started = 0, handling_import = 0, handling_with = 0; + int started = 0; if ((ps = PyParser_New(g, start)) == NULL) { fprintf(stderr, "no mem for new parser\n"); @@ -154,7 +162,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } if (type == ENDMARKER && started) { type = NEWLINE; /* Add an extra newline */ - handling_with = handling_import = 0; started = 0; /* Add the right number of dedent tokens, except if a certain flag is given -- @@ -268,14 +275,24 @@ done: return n; } -static void +static int initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; - err_ret->filename = filename; err_ret->lineno = 0; err_ret->offset = 0; err_ret->text = NULL; err_ret->token = -1; err_ret->expected = -1; +#ifndef PGEN + if (filename) + err_ret->filename = PyUnicode_DecodeFSDefault(filename); + else + err_ret->filename = PyUnicode_FromString("<string>"); + if (err_ret->filename == NULL) { + err_ret->error = E_ERROR; + return -1; + } +#endif + return 0; } diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c new file mode 100644 index 0000000..97b9288 --- /dev/null +++ b/Parser/parsetok_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "parsetok.c" diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 4b7b55a..52b8380 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag; /* Forward */ grammar *getgrammar(char *filename); +void Py_Exit(int) _Py_NO_RETURN; + void Py_Exit(int sts) { diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3f6be2f..f4d7e3f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -128,7 +128,6 @@ tok_new(void) tok->prompt = tok->nextprompt = NULL; tok->lineno = 0; tok->level = 0; - tok->filename = NULL; tok->altwarning = 1; tok->alterror = 1; tok->alttabsize = 1; @@ -140,6 +139,7 @@ tok_new(void) tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN + tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif @@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; - PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok) if (badchar) { /* Need to add 1 to the line number, since this line has not been counted, yet. */ - filename = PyUnicode_DecodeFSDefault(tok->filename); - if (filename != NULL) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, filename, tok->lineno + 1); - Py_DECREF(filename); - } + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } #endif @@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok) #ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->filename); #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); @@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok) return 1; } if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " +#ifdef PGEN + PySys_WriteStderr("inconsistent use of tabs and spaces " + "in indentation\n"); +#else + PySys_FormatStderr("%U: inconsistent use of tabs and spaces " "in indentation\n", tok->filename); +#endif tok->altwarning = 0; } return 0; @@ -1689,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } -/* Get -*- encoding -*- from a Python file. +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -1718,6 +1720,20 @@ PyTokenizer_FindEncoding(int fd) fclose(fp); return NULL; } +#ifndef PGEN + if (filename != NULL) { + Py_INCREF(filename); + tok->filename = filename; + } + else { + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) { + fclose(fp); + PyTokenizer_Free(tok); + return encoding; + } + } +#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1731,6 +1747,12 @@ PyTokenizer_FindEncoding(int fd) return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2be3bf2..ed1f3aa 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,13 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* encoded to the filesystem encoding */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ @@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } |