diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 14 | ||||
-rw-r--r-- | Parser/asdl.py | 25 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 28 | ||||
-rw-r--r-- | Parser/intrcheck.c | 174 | ||||
-rw-r--r-- | Parser/parsetok.c | 37 | ||||
-rw-r--r-- | Parser/parsetok_pgen.c | 2 | ||||
-rw-r--r-- | Parser/pgenmain.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 67 | ||||
-rw-r--r-- | Parser/tokenizer.h | 9 |
9 files changed, 107 insertions, 251 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 9407b2f..6955199 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -1,6 +1,6 @@ --- ASDL's four builtin types are identifier, int, string, object +-- ASDL's five builtin types are identifier, int, string, bytes, object -module Python version "$Revision$" +module Python { mod = Module(stmt* body) | Interactive(stmt* body) @@ -28,11 +28,10 @@ module Python version "$Revision$" | For(expr target, expr iter, stmt* body, stmt* orelse) | While(expr test, stmt* body, stmt* orelse) | If(expr test, stmt* body, stmt* orelse) - | With(expr context_expr, expr? optional_vars, stmt* body) + | With(withitem* items, stmt* body) | Raise(expr? exc, expr? cause) - | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) - | TryFinally(stmt* body, stmt* finalbody) + | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) | Assert(expr test, expr? msg) | Import(alias* names) @@ -68,7 +67,7 @@ module Python version "$Revision$" expr? starargs, expr? kwargs) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? - | Bytes(string s) + | Bytes(bytes s) | Ellipsis -- other literals? bools? @@ -100,7 +99,6 @@ module Python version "$Revision$" comprehension = (expr target, expr iter, expr* ifs) - -- not sure what to call the first argument for raise and except excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) attributes (int lineno, int col_offset) @@ -115,5 +113,7 @@ module Python version "$Revision$" -- import name with optional 'as' alias. alias = (identifier name, identifier? asname) + + withitem = (expr context_expr, expr? optional_vars) } diff --git a/Parser/asdl.py b/Parser/asdl.py index 7b4e2dc..01a8b5e 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object): raise ASDLSyntaxError(tok.lineno, tok) def p_module_0(self, info): - " module ::= Id Id version { } " - module, name, version, _0, _1 = info + " module ::= Id Id { } " + module, name, _0, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, None, version) + return Module(name, None) def p_module(self, info): - " module ::= Id Id version { definitions } " - module, name, version, _0, definitions, _1 = info + " module ::= Id Id { definitions } " + module, name, _0, definitions, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, definitions, version) - - def p_version(self, info): - "version ::= Id String" - version, V = info - if version.value != "version": - raise ASDLSyntaxError(version.lineno, - msg="expected 'version', found %" % version) - return V + return Module(name, definitions) def p_definition_0(self, definition): " definitions ::= definition " @@ -236,7 +228,7 @@ class ASDLParser(spark.GenericParser, object): " field ::= Id ? " return Field(type[0], opt=True) -builtin_types = ("identifier", "string", "int", "bool", "object") +builtin_types = ("identifier", "string", "bytes", "int", "object") # below is a collection of classes to capture the AST of an AST :-) # not sure if any of the methods are useful yet, but I'm adding them @@ -246,10 +238,9 @@ class AST(object): pass # a marker class class Module(AST): - def __init__(self, name, dfns, version): + def __init__(self, name, dfns): self.name = name self.dfns = dfns - self.version = version self.types = {} # maps type name to value (from dfns) for type in dfns: self.types[type.name.value] = type.value diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 8a7f8ae..0b95aaa 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -5,6 +5,7 @@ # handle fields that have a type but no name import os, sys +import subprocess import asdl @@ -775,6 +776,7 @@ static PyObject* ast2obj_object(void *o) } #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object +#define ast2obj_bytes ast2obj_object static PyObject* ast2obj_int(long b) { @@ -812,6 +814,15 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) return obj2ast_object(obj, out, arena); } +static int obj2ast_bytes(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (!PyBytes_CheckExact(obj)) { + PyErr_SetString(PyExc_TypeError, "AST bytes must be of type bytes"); + return 1; + } + return obj2ast_object(obj, out, arena); +} + static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) { int i; @@ -914,10 +925,6 @@ class ASTModuleVisitor(PickleVisitor): self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1) self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) self.emit("return NULL;", 2) - # Value of version: "$Revision$" - self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' - % mod.version, 1) - self.emit("return NULL;", 2) for dfn in mod.dfns: self.visit(dfn) self.emit("return m;", 1) @@ -1138,24 +1145,12 @@ class ChainOfVisitors: common_msg = "/* File automatically generated by %s. */\n\n" -c_file_msg = """ -/* - __version__ %s. - - This module must be committed separately after each AST grammar change; - The __version__ number is set to the revision number of the commit - containing the grammar change. -*/ - -""" - def main(srcfile): argv0 = sys.argv[0] components = argv0.split(os.sep) argv0 = os.sep.join(components[-2:]) auto_gen_msg = common_msg % argv0 mod = asdl.parse(srcfile) - mod.version = "82163" if not asdl.check(mod): sys.exit(1) if INC_DIR: @@ -1177,7 +1172,6 @@ def main(srcfile): p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") f = open(p, "w") f.write(auto_gen_msg) - f.write(c_file_msg % mod.version) f.write('#include "Python.h"\n') f.write('#include "%s-ast.h"\n' % mod.name) f.write('\n') diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c deleted file mode 100644 index 4439864..0000000 --- a/Parser/intrcheck.c +++ /dev/null @@ -1,174 +0,0 @@ - -/* Check for interrupts */ - -#include "Python.h" -#include "pythread.h" - -#ifdef QUICKWIN - -#include <io.h> - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - _wyield(); -} - -#define OK - -#endif /* QUICKWIN */ - -#if defined(_M_IX86) && !defined(__QNX__) -#include <io.h> -#endif - -#if defined(MSDOS) && !defined(QUICKWIN) - -#ifdef __GNUC__ - -/* This is for DJGPP's GO32 extender. I don't know how to trap - * control-C (There's no API for ctrl-C, and I don't want to mess with - * the interrupt vectors.) However, this DOES catch control-break. - * --Amrit - */ - -#include <go32.h> - -void -PyOS_InitInterrupts(void) -{ - _go32_want_ctrl_break(1 /* TRUE */); -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - return _go32_was_ctrl_break_hit(); -} - -#else /* !__GNUC__ */ - -/* This might work for MS-DOS (untested though): */ - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - int interrupted = 0; - while (kbhit()) { - if (getch() == '\003') - interrupted = 1; - } - return interrupted; -} - -#endif /* __GNUC__ */ - -#define OK - -#endif /* MSDOS && !QUICKWIN */ - - -#ifndef OK - -/* Default version -- for real operating systems and for Standard C */ - -#include <stdio.h> -#include <string.h> -#include <signal.h> - -static int interrupted; - -void -PyErr_SetInterrupt(void) -{ - interrupted = 1; -} - -extern int PyErr_CheckSignals(void); - -static int -checksignals_witharg(void * arg) -{ - return PyErr_CheckSignals(); -} - -static void -intcatcher(int sig) -{ - extern void Py_Exit(int); - static char message[] = -"python: to interrupt a truly hanging Python program, interrupt once more.\n"; - switch (interrupted++) { - case 0: - break; - case 1: - write(2, message, strlen(message)); - break; - case 2: - interrupted = 0; - Py_Exit(1); - break; - } - PyOS_setsig(SIGINT, intcatcher); - Py_AddPendingCall(checksignals_witharg, NULL); -} - -static void (*old_siginthandler)(int) = SIG_DFL; - -void -PyOS_InitInterrupts(void) -{ - if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) - PyOS_setsig(SIGINT, intcatcher); -} - -void -PyOS_FiniInterrupts(void) -{ - PyOS_setsig(SIGINT, old_siginthandler); -} - -int -PyOS_InterruptOccurred(void) -{ - if (!interrupted) - return 0; - interrupted = 0; - return 1; -} - -#endif /* !OK */ - -void -PyOS_AfterFork(void) -{ -#ifdef WITH_THREAD - PyEval_ReInitThreads(); - PyThread_ReInitTLS(); -#endif -} diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 7636a54..431a87c 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -13,7 +13,7 @@ /* Forward */ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); -static void initerr(perrdetail *err_ret, const char* filename); +static int initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ node * @@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, struct tok_state *tok; int exec_input = start == file_input; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if (*flags & PyPARSE_IGNORE_COOKIE) tok = PyTokenizer_FromUTF8(s, exec_input); @@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, return NULL; } - tok->filename = filename ? filename : "<string>"; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, { struct tok_state *tok; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } - tok->filename = filename; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -127,7 +135,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, { parser_state *ps; node *n; - int started = 0, handling_import = 0, handling_with = 0; + int started = 0; if ((ps = PyParser_New(g, start)) == NULL) { fprintf(stderr, "no mem for new parser\n"); @@ -154,7 +162,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } if (type == ENDMARKER && started) { type = NEWLINE; /* Add an extra newline */ - handling_with = handling_import = 0; started = 0; /* Add the right number of dedent tokens, except if a certain flag is given -- @@ -225,7 +232,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, PyParser_Delete(ps); if (n == NULL) { - if (tok->lineno <= 1 && tok->done == E_EOF) + if (tok->done == E_EOF) err_ret->error = E_EOF; err_ret->lineno = tok->lineno; if (tok->buf != NULL) { @@ -268,14 +275,24 @@ done: return n; } -static void +static int initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; - err_ret->filename = filename; err_ret->lineno = 0; err_ret->offset = 0; err_ret->text = NULL; err_ret->token = -1; err_ret->expected = -1; +#ifndef PGEN + if (filename) + err_ret->filename = PyUnicode_DecodeFSDefault(filename); + else + err_ret->filename = PyUnicode_FromString("<string>"); + if (err_ret->filename == NULL) { + err_ret->error = E_ERROR; + return -1; + } +#endif + return 0; } diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c new file mode 100644 index 0000000..97b9288 --- /dev/null +++ b/Parser/parsetok_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "parsetok.c" diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 4b7b55a..52b8380 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag; /* Forward */ grammar *getgrammar(char *filename); +void Py_Exit(int) _Py_NO_RETURN; + void Py_Exit(int sts) { diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5ba12a4..f4d7e3f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -128,7 +128,6 @@ tok_new(void) tok->prompt = tok->nextprompt = NULL; tok->lineno = 0; tok->level = 0; - tok->filename = NULL; tok->altwarning = 1; tok->alterror = 1; tok->alttabsize = 1; @@ -140,6 +139,7 @@ tok_new(void) tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN + tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif @@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; - PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -586,19 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok) if (badchar) { /* Need to add 1 to the line number, since this line has not been counted, yet. */ - if (tok->filename != NULL) - filename = PyUnicode_DecodeFSDefault(tok->filename); - else - filename = PyUnicode_FromString("<file>"); - if (filename != NULL) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, filename, tok->lineno + 1); - Py_DECREF(filename); - } + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } #endif @@ -856,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok) #ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->filename); #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); @@ -1250,8 +1243,13 @@ indenterror(struct tok_state *tok) return 1; } if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " +#ifdef PGEN + PySys_WriteStderr("inconsistent use of tabs and spaces " + "in indentation\n"); +#else + PySys_FormatStderr("%U: inconsistent use of tabs and spaces " "in indentation\n", tok->filename); +#endif tok->altwarning = 0; } return 0; @@ -1692,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } -/* Get -*- encoding -*- from a Python file. +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -1721,6 +1720,20 @@ PyTokenizer_FindEncoding(int fd) fclose(fp); return NULL; } +#ifndef PGEN + if (filename != NULL) { + Py_INCREF(filename); + tok->filename = filename; + } + else { + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) { + fclose(fp); + PyTokenizer_Free(tok); + return encoding; + } + } +#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1734,6 +1747,12 @@ PyTokenizer_FindEncoding(int fd) return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2be3bf2..ed1f3aa 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,13 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* encoded to the filesystem encoding */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ @@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } |