diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 203 | ||||
-rw-r--r-- | Parser/asdl.py | 25 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 89 | ||||
-rw-r--r-- | Parser/intrcheck.c | 174 | ||||
-rw-r--r-- | Parser/parsetok.c | 67 | ||||
-rw-r--r-- | Parser/parsetok_pgen.c | 2 | ||||
-rw-r--r-- | Parser/pgenmain.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 100 | ||||
-rw-r--r-- | Parser/tokenizer.h | 9 |
9 files changed, 302 insertions, 369 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 9407b2f..6b06dec 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -1,119 +1,120 @@ --- ASDL's four builtin types are identifier, int, string, object +-- ASDL's five builtin types are identifier, int, string, bytes, object -module Python version "$Revision$" +module Python { - mod = Module(stmt* body) - | Interactive(stmt* body) - | Expression(expr body) + mod = Module(stmt* body) + | Interactive(stmt* body) + | Expression(expr body) - -- not really an actual node but useful in Jython's typesystem. - | Suite(stmt* body) + -- not really an actual node but useful in Jython's typesystem. + | Suite(stmt* body) - stmt = FunctionDef(identifier name, arguments args, + stmt = FunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns) - | ClassDef(identifier name, - expr* bases, - keyword* keywords, - expr? starargs, - expr? kwargs, - stmt* body, - expr* decorator_list) - | Return(expr? value) - - | Delete(expr* targets) - | Assign(expr* targets, expr value) - | AugAssign(expr target, operator op, expr value) - - -- use 'orelse' because else is a keyword in target languages - | For(expr target, expr iter, stmt* body, stmt* orelse) - | While(expr test, stmt* body, stmt* orelse) - | If(expr test, stmt* body, stmt* orelse) - | With(expr context_expr, expr? optional_vars, stmt* body) - - | Raise(expr? exc, expr? cause) - | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) - | TryFinally(stmt* body, stmt* finalbody) - | Assert(expr test, expr? msg) - - | Import(alias* names) - | ImportFrom(identifier? module, alias* names, int? level) - - | Global(identifier* names) - | Nonlocal(identifier* names) - | Expr(expr value) - | Pass | Break | Continue - - -- XXX Jython will be different - -- col_offset is the byte offset in the utf8 string the parser uses - attributes (int lineno, int col_offset) - - -- BoolOp() can use left & right? - expr = BoolOp(boolop op, expr* values) - | BinOp(expr left, operator op, expr right) - | UnaryOp(unaryop op, expr operand) - | Lambda(arguments args, expr body) - | IfExp(expr test, expr body, expr orelse) - | Dict(expr* keys, expr* values) - | Set(expr* elts) - | ListComp(expr elt, comprehension* generators) - | SetComp(expr elt, comprehension* generators) - | DictComp(expr key, expr value, comprehension* generators) - | GeneratorExp(expr elt, comprehension* generators) - -- the grammar constrains where yield expressions can occur - | Yield(expr? value) - -- need sequences for compare to distinguish between - -- x < 4 < 3 and (x < 4) < 3 - | Compare(expr left, cmpop* ops, expr* comparators) - | Call(expr func, expr* args, keyword* keywords, - expr? starargs, expr? kwargs) - | Num(object n) -- a number as a PyObject. - | Str(string s) -- need to specify raw, unicode, etc? - | Bytes(string s) - | Ellipsis - -- other literals? bools? - - -- the following expression can appear in assignment context - | Attribute(expr value, identifier attr, expr_context ctx) - | Subscript(expr value, slice slice, expr_context ctx) - | Starred(expr value, expr_context ctx) - | Name(identifier id, expr_context ctx) - | List(expr* elts, expr_context ctx) - | Tuple(expr* elts, expr_context ctx) - - -- col_offset is the byte offset in the utf8 string the parser uses - attributes (int lineno, int col_offset) - - expr_context = Load | Store | Del | AugLoad | AugStore | Param - - slice = Slice(expr? lower, expr? upper, expr? step) - | ExtSlice(slice* dims) - | Index(expr value) - - boolop = And | Or - - operator = Add | Sub | Mult | Div | Mod | Pow | LShift + | ClassDef(identifier name, + expr* bases, + keyword* keywords, + expr? starargs, + expr? kwargs, + stmt* body, + expr* decorator_list) + | Return(expr? value) + + | Delete(expr* targets) + | Assign(expr* targets, expr value) + | AugAssign(expr target, operator op, expr value) + + -- use 'orelse' because else is a keyword in target languages + | For(expr target, expr iter, stmt* body, stmt* orelse) + | While(expr test, stmt* body, stmt* orelse) + | If(expr test, stmt* body, stmt* orelse) + | With(withitem* items, stmt* body) + + | Raise(expr? exc, expr? cause) + | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) + | Assert(expr test, expr? msg) + + | Import(alias* names) + | ImportFrom(identifier? module, alias* names, int? level) + + | Global(identifier* names) + | Nonlocal(identifier* names) + | Expr(expr value) + | Pass | Break | Continue + + -- XXX Jython will be different + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + -- BoolOp() can use left & right? + expr = BoolOp(boolop op, expr* values) + | BinOp(expr left, operator op, expr right) + | UnaryOp(unaryop op, expr operand) + | Lambda(arguments args, expr body) + | IfExp(expr test, expr body, expr orelse) + | Dict(expr* keys, expr* values) + | Set(expr* elts) + | ListComp(expr elt, comprehension* generators) + | SetComp(expr elt, comprehension* generators) + | DictComp(expr key, expr value, comprehension* generators) + | GeneratorExp(expr elt, comprehension* generators) + -- the grammar constrains where yield expressions can occur + | Yield(expr? value) + | YieldFrom(expr? value) + -- need sequences for compare to distinguish between + -- x < 4 < 3 and (x < 4) < 3 + | Compare(expr left, cmpop* ops, expr* comparators) + | Call(expr func, expr* args, keyword* keywords, + expr? starargs, expr? kwargs) + | Num(object n) -- a number as a PyObject. + | Str(string s) -- need to specify raw, unicode, etc? + | Bytes(bytes s) + | Ellipsis + -- other literals? bools? + + -- the following expression can appear in assignment context + | Attribute(expr value, identifier attr, expr_context ctx) + | Subscript(expr value, slice slice, expr_context ctx) + | Starred(expr value, expr_context ctx) + | Name(identifier id, expr_context ctx) + | List(expr* elts, expr_context ctx) + | Tuple(expr* elts, expr_context ctx) + + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + expr_context = Load | Store | Del | AugLoad | AugStore | Param + + slice = Slice(expr? lower, expr? upper, expr? step) + | ExtSlice(slice* dims) + | Index(expr value) + + boolop = And | Or + + operator = Add | Sub | Mult | Div | Mod | Pow | LShift | RShift | BitOr | BitXor | BitAnd | FloorDiv - unaryop = Invert | Not | UAdd | USub + unaryop = Invert | Not | UAdd | USub - cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn - comprehension = (expr target, expr iter, expr* ifs) + comprehension = (expr target, expr iter, expr* ifs) - -- not sure what to call the first argument for raise and except - excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) - attributes (int lineno, int col_offset) + excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) + attributes (int lineno, int col_offset) - arguments = (arg* args, identifier? vararg, expr? varargannotation, + arguments = (arg* args, identifier? vararg, expr? varargannotation, arg* kwonlyargs, identifier? kwarg, expr? kwargannotation, expr* defaults, expr* kw_defaults) - arg = (identifier arg, expr? annotation) + arg = (identifier arg, expr? annotation) - -- keyword arguments supplied to call - keyword = (identifier arg, expr value) + -- keyword arguments supplied to call + keyword = (identifier arg, expr value) - -- import name with optional 'as' alias. - alias = (identifier name, identifier? asname) + -- import name with optional 'as' alias. + alias = (identifier name, identifier? asname) + + withitem = (expr context_expr, expr? optional_vars) } diff --git a/Parser/asdl.py b/Parser/asdl.py index 7b4e2dc..01a8b5e 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object): raise ASDLSyntaxError(tok.lineno, tok) def p_module_0(self, info): - " module ::= Id Id version { } " - module, name, version, _0, _1 = info + " module ::= Id Id { } " + module, name, _0, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, None, version) + return Module(name, None) def p_module(self, info): - " module ::= Id Id version { definitions } " - module, name, version, _0, definitions, _1 = info + " module ::= Id Id { definitions } " + module, name, _0, definitions, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, definitions, version) - - def p_version(self, info): - "version ::= Id String" - version, V = info - if version.value != "version": - raise ASDLSyntaxError(version.lineno, - msg="expected 'version', found %" % version) - return V + return Module(name, definitions) def p_definition_0(self, definition): " definitions ::= definition " @@ -236,7 +228,7 @@ class ASDLParser(spark.GenericParser, object): " field ::= Id ? " return Field(type[0], opt=True) -builtin_types = ("identifier", "string", "int", "bool", "object") +builtin_types = ("identifier", "string", "bytes", "int", "object") # below is a collection of classes to capture the AST of an AST :-) # not sure if any of the methods are useful yet, but I'm adding them @@ -246,10 +238,9 @@ class AST(object): pass # a marker class class Module(AST): - def __init__(self, name, dfns, version): + def __init__(self, name, dfns): self.name = name self.dfns = dfns - self.version = version self.types = {} # maps type name to value (from dfns) for type in dfns: self.types[type.name.value] = type.value diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index b85c07e..769f73f 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -5,6 +5,7 @@ # handle fields that have a type but no name import os, sys +import subprocess import asdl @@ -84,8 +85,16 @@ class EmitVisitor(asdl.VisitorBase): def __init__(self, file): self.file = file + self.identifiers = set() super(EmitVisitor, self).__init__() + def emit_identifier(self, name): + name = str(name) + if name in self.identifiers: + return + self.emit("_Py_IDENTIFIER(%s);" % name, 0) + self.identifiers.add(name) + def emit(self, s, depth, reflow=True): # XXX reflow long lines? if reflow: @@ -485,12 +494,12 @@ class Obj2ModVisitor(PickleVisitor): def visitField(self, field, name, sum=None, prod=None, depth=0): ctype = get_c_type(field.type) - self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth) + self.emit("if (_PyObject_HasAttrId(obj, &PyId_%s)) {" % field.name, depth) self.emit("int res;", depth+1) if field.seq: self.emit("Py_ssize_t len;", depth+1) self.emit("Py_ssize_t i;", depth+1) - self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1) + self.emit("tmp = _PyObject_GetAttrId(obj, &PyId_%s);" % field.name, depth+1) self.emit("if (tmp == NULL) goto failed;", depth+1) if field.seq: self.emit("if (!PyList_Check(tmp)) {", depth+1) @@ -552,6 +561,8 @@ class PyTypesDeclareVisitor(PickleVisitor): self.emit("static PyTypeObject *%s_type;" % name, 0) self.emit("static PyObject* ast2obj_%s(void*);" % name, 0) if prod.fields: + for f in prod.fields: + self.emit_identifier(f.name) self.emit("static char *%s_fields[]={" % name,0) for f in prod.fields: self.emit('"%s",' % f.name, 1) @@ -560,6 +571,8 @@ class PyTypesDeclareVisitor(PickleVisitor): def visitSum(self, sum, name): self.emit("static PyTypeObject *%s_type;" % name, 0) if sum.attributes: + for a in sum.attributes: + self.emit_identifier(a.name) self.emit("static char *%s_attributes[] = {" % name, 0) for a in sum.attributes: self.emit('"%s",' % a.name, 1) @@ -579,6 +592,8 @@ class PyTypesDeclareVisitor(PickleVisitor): def visitConstructor(self, cons, name): self.emit("static PyTypeObject *%s_type;" % cons.name, 0) if cons.fields: + for t in cons.fields: + self.emit_identifier(t.name) self.emit("static char *%s_fields[]={" % cons.name, 0) for t in cons.fields: self.emit('"%s",' % t.name, 1) @@ -588,13 +603,25 @@ class PyTypesVisitor(PickleVisitor): def visitModule(self, mod): self.emit(""" +typedef struct { + PyObject_HEAD + PyObject *dict; +} AST_object; + +static void +ast_dealloc(AST_object *self) +{ + Py_CLEAR(self->dict); +} + static int ast_type_init(PyObject *self, PyObject *args, PyObject *kw) { + _Py_IDENTIFIER(_fields); Py_ssize_t i, numfields = 0; int res = -1; PyObject *key, *value, *fields; - fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields"); + fields = _PyObject_GetAttrId((PyObject*)Py_TYPE(self), &PyId__fields); if (!fields) PyErr_Clear(); if (fields) { @@ -644,7 +671,8 @@ static PyObject * ast_type_reduce(PyObject *self, PyObject *unused) { PyObject *res; - PyObject *dict = PyObject_GetAttrString(self, "__dict__"); + _Py_IDENTIFIER(__dict__); + PyObject *dict = _PyObject_GetAttrId(self, &PyId___dict__); if (dict == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -664,12 +692,17 @@ static PyMethodDef ast_type_methods[] = { {NULL} }; +static PyGetSetDef ast_type_getsets[] = { + {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict}, + {NULL} +}; + static PyTypeObject AST_type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "_ast.AST", - sizeof(PyObject), + sizeof(AST_object), 0, - 0, /* tp_dealloc */ + (destructor)ast_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ @@ -694,12 +727,12 @@ static PyTypeObject AST_type = { 0, /* tp_iternext */ ast_type_methods, /* tp_methods */ 0, /* tp_members */ - 0, /* tp_getset */ + ast_type_getsets, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ + offsetof(AST_object, dict),/* tp_dictoffset */ (initproc)ast_type_init, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ PyType_GenericNew, /* tp_new */ @@ -730,6 +763,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) { int i, result; + _Py_IDENTIFIER(_attributes); PyObject *s, *l = PyTuple_New(num_fields); if (!l) return 0; @@ -741,7 +775,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) } PyTuple_SET_ITEM(l, i, s); } - result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0; + result = _PyObject_SetAttrId((PyObject*)type, &PyId__attributes, l) >= 0; Py_DECREF(l); return result; } @@ -750,7 +784,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*)) { - int i, n = asdl_seq_LEN(seq); + Py_ssize_t i, n = asdl_seq_LEN(seq); PyObject *result = PyList_New(n); PyObject *value; if (!result) @@ -775,6 +809,7 @@ static PyObject* ast2obj_object(void *o) } #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object +#define ast2obj_bytes ast2obj_object static PyObject* ast2obj_int(long b) { @@ -812,6 +847,15 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) return obj2ast_object(obj, out, arena); } +static int obj2ast_bytes(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (!PyBytes_CheckExact(obj)) { + PyErr_SetString(PyExc_TypeError, "AST bytes must be of type bytes"); + return 1; + } + return obj2ast_object(obj, out, arena); +} + static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) { int i; @@ -910,10 +954,6 @@ class ASTModuleVisitor(PickleVisitor): self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1) self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) self.emit("return NULL;", 2) - # Value of version: "$Revision$" - self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' - % mod.version, 1) - self.emit("return NULL;", 2) for dfn in mod.dfns: self.visit(dfn) self.emit("return m;", 1) @@ -997,7 +1037,7 @@ class ObjVisitor(PickleVisitor): for a in sum.attributes: self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1) self.emit("if (!value) goto failed;", 1) - self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1) + self.emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) < 0)' % a.name, 1) self.emit('goto failed;', 2) self.emit('Py_DECREF(value);', 1) self.func_end() @@ -1043,7 +1083,7 @@ class ObjVisitor(PickleVisitor): value = "o->v.%s.%s" % (name, field.name) self.set(field, value, depth) emit("if (!value) goto failed;", 0) - emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0) + emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) == -1)' % field.name, 0) emit("goto failed;", 1) emit("Py_DECREF(value);", 0) @@ -1066,7 +1106,7 @@ class ObjVisitor(PickleVisitor): # While the sequence elements are stored as void*, # ast2obj_cmpop expects an enum self.emit("{", depth) - self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1) + self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1) self.emit("value = PyList_New(n);", depth+1) self.emit("if (!value) goto failed;", depth+1) self.emit("for(i = 0; i < n; i++)", depth+1) @@ -1134,24 +1174,12 @@ class ChainOfVisitors: common_msg = "/* File automatically generated by %s. */\n\n" -c_file_msg = """ -/* - __version__ %s. - - This module must be committed separately after each AST grammar change; - The __version__ number is set to the revision number of the commit - containing the grammar change. -*/ - -""" - def main(srcfile): argv0 = sys.argv[0] components = argv0.split(os.sep) argv0 = os.sep.join(components[-2:]) auto_gen_msg = common_msg % argv0 mod = asdl.parse(srcfile) - mod.version = "82163" if not asdl.check(mod): sys.exit(1) if INC_DIR: @@ -1173,7 +1201,8 @@ def main(srcfile): p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") f = open(p, "w") f.write(auto_gen_msg) - f.write(c_file_msg % mod.version) + f.write('#include <stddef.h>\n') + f.write('\n') f.write('#include "Python.h"\n') f.write('#include "%s-ast.h"\n' % mod.name) f.write('\n') diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c deleted file mode 100644 index 4439864..0000000 --- a/Parser/intrcheck.c +++ /dev/null @@ -1,174 +0,0 @@ - -/* Check for interrupts */ - -#include "Python.h" -#include "pythread.h" - -#ifdef QUICKWIN - -#include <io.h> - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - _wyield(); -} - -#define OK - -#endif /* QUICKWIN */ - -#if defined(_M_IX86) && !defined(__QNX__) -#include <io.h> -#endif - -#if defined(MSDOS) && !defined(QUICKWIN) - -#ifdef __GNUC__ - -/* This is for DJGPP's GO32 extender. I don't know how to trap - * control-C (There's no API for ctrl-C, and I don't want to mess with - * the interrupt vectors.) However, this DOES catch control-break. - * --Amrit - */ - -#include <go32.h> - -void -PyOS_InitInterrupts(void) -{ - _go32_want_ctrl_break(1 /* TRUE */); -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - return _go32_was_ctrl_break_hit(); -} - -#else /* !__GNUC__ */ - -/* This might work for MS-DOS (untested though): */ - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - int interrupted = 0; - while (kbhit()) { - if (getch() == '\003') - interrupted = 1; - } - return interrupted; -} - -#endif /* __GNUC__ */ - -#define OK - -#endif /* MSDOS && !QUICKWIN */ - - -#ifndef OK - -/* Default version -- for real operating systems and for Standard C */ - -#include <stdio.h> -#include <string.h> -#include <signal.h> - -static int interrupted; - -void -PyErr_SetInterrupt(void) -{ - interrupted = 1; -} - -extern int PyErr_CheckSignals(void); - -static int -checksignals_witharg(void * arg) -{ - return PyErr_CheckSignals(); -} - -static void -intcatcher(int sig) -{ - extern void Py_Exit(int); - static char message[] = -"python: to interrupt a truly hanging Python program, interrupt once more.\n"; - switch (interrupted++) { - case 0: - break; - case 1: - write(2, message, strlen(message)); - break; - case 2: - interrupted = 0; - Py_Exit(1); - break; - } - PyOS_setsig(SIGINT, intcatcher); - Py_AddPendingCall(checksignals_witharg, NULL); -} - -static void (*old_siginthandler)(int) = SIG_DFL; - -void -PyOS_InitInterrupts(void) -{ - if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) - PyOS_setsig(SIGINT, intcatcher); -} - -void -PyOS_FiniInterrupts(void) -{ - PyOS_setsig(SIGINT, old_siginthandler); -} - -int -PyOS_InterruptOccurred(void) -{ - if (!interrupted) - return 0; - interrupted = 0; - return 1; -} - -#endif /* !OK */ - -void -PyOS_AfterFork(void) -{ -#ifdef WITH_THREAD - PyEval_ReInitThreads(); - PyThread_ReInitTLS(); -#endif -} diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 73e7e3c..7beb735 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -13,7 +13,7 @@ /* Forward */ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); -static void initerr(perrdetail *err_ret, const char* filename); +static int initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ node * @@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, struct tok_state *tok; int exec_input = start == file_input; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if (*flags & PyPARSE_IGNORE_COOKIE) tok = PyTokenizer_FromUTF8(s, exec_input); @@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, return NULL; } - tok->filename = filename ? filename : "<string>"; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, { struct tok_state *tok; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } - tok->filename = filename; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -127,7 +135,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, { parser_state *ps; node *n; - int started = 0, handling_import = 0, handling_with = 0; + int started = 0; if ((ps = PyParser_New(g, start)) == NULL) { fprintf(stderr, "no mem for new parser\n"); @@ -154,7 +162,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } if (type == ENDMARKER && started) { type = NEWLINE; /* Add an extra newline */ - handling_with = handling_import = 0; started = 0; /* Add the right number of dedent tokens, except if a certain flag is given -- @@ -217,6 +224,36 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, if (err_ret->error == E_DONE) { n = ps->p_tree; ps->p_tree = NULL; + +#ifndef PGEN + /* Check that the source for a single input statement really + is a single statement by looking at what is left in the + buffer after parsing. Trailing whitespace and comments + are OK. */ + if (start == single_input) { + char *cur = tok->cur; + char c = *tok->cur; + + for (;;) { + while (c == ' ' || c == '\t' || c == '\n' || c == '\014') + c = *++cur; + + if (!c) + break; + + if (c != '#') { + err_ret->error = E_BADSINGLE; + PyNode_Free(n); + n = NULL; + break; + } + + /* Suck up comment. */ + while (c && c != '\n') + c = *++cur; + } + } +#endif } else n = NULL; @@ -227,7 +264,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, PyParser_Delete(ps); if (n == NULL) { - if (tok->lineno <= 1 && tok->done == E_EOF) + if (tok->done == E_EOF) err_ret->error = E_EOF; err_ret->lineno = tok->lineno; if (tok->buf != NULL) { @@ -270,14 +307,24 @@ done: return n; } -static void +static int initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; - err_ret->filename = filename; err_ret->lineno = 0; err_ret->offset = 0; err_ret->text = NULL; err_ret->token = -1; err_ret->expected = -1; +#ifndef PGEN + if (filename) + err_ret->filename = PyUnicode_DecodeFSDefault(filename); + else + err_ret->filename = PyUnicode_FromString("<string>"); + if (err_ret->filename == NULL) { + err_ret->error = E_ERROR; + return -1; + } +#endif + return 0; } diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c new file mode 100644 index 0000000..97b9288 --- /dev/null +++ b/Parser/parsetok_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "parsetok.c" diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 4b7b55a..52b8380 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag; /* Forward */ grammar *getgrammar(char *filename); +void Py_Exit(int) _Py_NO_RETURN; + void Py_Exit(int sts) { diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5ba12a4..93a4a5c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -128,7 +128,6 @@ tok_new(void) tok->prompt = tok->nextprompt = NULL; tok->lineno = 0; tok->level = 0; - tok->filename = NULL; tok->altwarning = 1; tok->alterror = 1; tok->alttabsize = 1; @@ -140,6 +139,7 @@ tok_new(void) tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN + tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif @@ -462,6 +462,8 @@ static int fp_setreadl(struct tok_state *tok, const char* enc) { PyObject *readline = NULL, *stream = NULL, *io = NULL; + _Py_IDENTIFIER(open); + _Py_IDENTIFIER(readline); int fd; io = PyImport_ImportModuleNoBlock("io"); @@ -474,13 +476,13 @@ fp_setreadl(struct tok_state *tok, const char* enc) goto cleanup; } - stream = PyObject_CallMethod(io, "open", "isisOOO", + stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO", fd, "r", -1, enc, Py_None, Py_None, Py_False); if (stream == NULL) goto cleanup; Py_XDECREF(tok->decoding_readline); - readline = PyObject_GetAttrString(stream, "readline"); + readline = _PyObject_GetAttrId(stream, &PyId_readline); tok->decoding_readline = readline; /* The file has been reopened; parsing will restart from @@ -545,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; - PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -586,19 +587,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok) if (badchar) { /* Need to add 1 to the line number, since this line has not been counted, yet. */ - if (tok->filename != NULL) - filename = PyUnicode_DecodeFSDefault(tok->filename); - else - filename = PyUnicode_FromString("<file>"); - if (filename != NULL) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, filename, tok->lineno + 1); - Py_DECREF(filename); - } + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } #endif @@ -856,6 +850,7 @@ PyTokenizer_Free(struct tok_state *tok) #ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->filename); #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); @@ -1250,8 +1245,13 @@ indenterror(struct tok_state *tok) return 1; } if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " +#ifdef PGEN + PySys_WriteStderr("inconsistent use of tabs and spaces " + "in indentation\n"); +#else + PySys_FormatStderr("%U: inconsistent use of tabs and spaces " "in indentation\n", tok->filename); +#endif tok->altwarning = 0; } return 0; @@ -1260,14 +1260,16 @@ indenterror(struct tok_state *tok) #ifdef PGEN #define verify_identifier(tok) 1 #else -/* Verify that the identifier follows PEP 3131. */ +/* Verify that the identifier follows PEP 3131. + All identifier strings are guaranteed to be "ready" unicode objects. + */ static int verify_identifier(struct tok_state *tok) { PyObject *s; int result; s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); - if (s == NULL) { + if (s == NULL || PyUnicode_READY(s) == -1) { if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { PyErr_Clear(); tok->done = E_IDENTIFIER; @@ -1410,13 +1412,20 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) /* Identifier (most frequent token!) */ nonascii = 0; if (is_potential_identifier_start(c)) { - /* Process b"", r"" and br"" */ - if (c == 'b' || c == 'B') { - c = tok_nextc(tok); - if (c == '"' || c == '\'') - goto letter_quote; - } - if (c == 'r' || c == 'R') { + /* Process b"", r"", u"", br"" and rb"" */ + int saw_b = 0, saw_r = 0, saw_u = 0; + while (1) { + if (!(saw_b || saw_u) && (c == 'b' || c == 'B')) + saw_b = 1; + /* Since this is a backwards compatibility support literal we don't + want to support it in arbitrary order like byte literals. */ + else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U')) + saw_u = 1; + /* ur"" and ru"" are not supported */ + else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) + saw_r = 1; + else + break; c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; @@ -1692,17 +1701,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } -/* Get -*- encoding -*- from a Python file. +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -1721,6 +1731,20 @@ PyTokenizer_FindEncoding(int fd) fclose(fp); return NULL; } +#ifndef PGEN + if (filename != NULL) { + Py_INCREF(filename); + tok->filename = filename; + } + else { + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) { + fclose(fp); + PyTokenizer_Free(tok); + return encoding; + } + } +#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1734,6 +1758,12 @@ PyTokenizer_FindEncoding(int fd) return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2be3bf2..ed1f3aa 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,13 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* encoded to the filesystem encoding */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ @@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } |