diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/Python.asdl | 14 | ||||
-rw-r--r-- | Parser/asdl.py | 25 | ||||
-rwxr-xr-x | Parser/asdl_c.py | 59 | ||||
-rw-r--r-- | Parser/intrcheck.c | 174 | ||||
-rw-r--r-- | Parser/parsetok.c | 37 | ||||
-rw-r--r-- | Parser/parsetok_pgen.c | 2 | ||||
-rw-r--r-- | Parser/pgenmain.c | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 79 | ||||
-rw-r--r-- | Parser/tokenizer.h | 9 |
9 files changed, 139 insertions, 262 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl index 9407b2f..6955199 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -1,6 +1,6 @@ --- ASDL's four builtin types are identifier, int, string, object +-- ASDL's five builtin types are identifier, int, string, bytes, object -module Python version "$Revision$" +module Python { mod = Module(stmt* body) | Interactive(stmt* body) @@ -28,11 +28,10 @@ module Python version "$Revision$" | For(expr target, expr iter, stmt* body, stmt* orelse) | While(expr test, stmt* body, stmt* orelse) | If(expr test, stmt* body, stmt* orelse) - | With(expr context_expr, expr? optional_vars, stmt* body) + | With(withitem* items, stmt* body) | Raise(expr? exc, expr? cause) - | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) - | TryFinally(stmt* body, stmt* finalbody) + | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) | Assert(expr test, expr? msg) | Import(alias* names) @@ -68,7 +67,7 @@ module Python version "$Revision$" expr? starargs, expr? kwargs) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? - | Bytes(string s) + | Bytes(bytes s) | Ellipsis -- other literals? bools? @@ -100,7 +99,6 @@ module Python version "$Revision$" comprehension = (expr target, expr iter, expr* ifs) - -- not sure what to call the first argument for raise and except excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) attributes (int lineno, int col_offset) @@ -115,5 +113,7 @@ module Python version "$Revision$" -- import name with optional 'as' alias. alias = (identifier name, identifier? asname) + + withitem = (expr context_expr, expr? optional_vars) } diff --git a/Parser/asdl.py b/Parser/asdl.py index 7b4e2dc..01a8b5e 100644 --- a/Parser/asdl.py +++ b/Parser/asdl.py @@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object): raise ASDLSyntaxError(tok.lineno, tok) def p_module_0(self, info): - " module ::= Id Id version { } " - module, name, version, _0, _1 = info + " module ::= Id Id { } " + module, name, _0, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, None, version) + return Module(name, None) def p_module(self, info): - " module ::= Id Id version { definitions } " - module, name, version, _0, definitions, _1 = info + " module ::= Id Id { definitions } " + module, name, _0, definitions, _1 = info if module.value != "module": raise ASDLSyntaxError(module.lineno, msg="expected 'module', found %s" % module) - return Module(name, definitions, version) - - def p_version(self, info): - "version ::= Id String" - version, V = info - if version.value != "version": - raise ASDLSyntaxError(version.lineno, - msg="expected 'version', found %" % version) - return V + return Module(name, definitions) def p_definition_0(self, definition): " definitions ::= definition " @@ -236,7 +228,7 @@ class ASDLParser(spark.GenericParser, object): " field ::= Id ? " return Field(type[0], opt=True) -builtin_types = ("identifier", "string", "int", "bool", "object") +builtin_types = ("identifier", "string", "bytes", "int", "object") # below is a collection of classes to capture the AST of an AST :-) # not sure if any of the methods are useful yet, but I'm adding them @@ -246,10 +238,9 @@ class AST(object): pass # a marker class class Module(AST): - def __init__(self, name, dfns, version): + def __init__(self, name, dfns): self.name = name self.dfns = dfns - self.version = version self.types = {} # maps type name to value (from dfns) for type in dfns: self.types[type.name.value] = type.value diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index b85c07e..07c06de 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -5,6 +5,7 @@ # handle fields that have a type but no name import os, sys +import subprocess import asdl @@ -84,8 +85,16 @@ class EmitVisitor(asdl.VisitorBase): def __init__(self, file): self.file = file + self.identifiers = set() super(EmitVisitor, self).__init__() + def emit_identifier(self, name): + name = str(name) + if name in self.identifiers: + return + self.emit("_Py_IDENTIFIER(%s);" % name, 0) + self.identifiers.add(name) + def emit(self, s, depth, reflow=True): # XXX reflow long lines? if reflow: @@ -485,12 +494,12 @@ class Obj2ModVisitor(PickleVisitor): def visitField(self, field, name, sum=None, prod=None, depth=0): ctype = get_c_type(field.type) - self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth) + self.emit("if (_PyObject_HasAttrId(obj, &PyId_%s)) {" % field.name, depth) self.emit("int res;", depth+1) if field.seq: self.emit("Py_ssize_t len;", depth+1) self.emit("Py_ssize_t i;", depth+1) - self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1) + self.emit("tmp = _PyObject_GetAttrId(obj, &PyId_%s);" % field.name, depth+1) self.emit("if (tmp == NULL) goto failed;", depth+1) if field.seq: self.emit("if (!PyList_Check(tmp)) {", depth+1) @@ -552,6 +561,8 @@ class PyTypesDeclareVisitor(PickleVisitor): self.emit("static PyTypeObject *%s_type;" % name, 0) self.emit("static PyObject* ast2obj_%s(void*);" % name, 0) if prod.fields: + for f in prod.fields: + self.emit_identifier(f.name) self.emit("static char *%s_fields[]={" % name,0) for f in prod.fields: self.emit('"%s",' % f.name, 1) @@ -560,6 +571,8 @@ class PyTypesDeclareVisitor(PickleVisitor): def visitSum(self, sum, name): self.emit("static PyTypeObject *%s_type;" % name, 0) if sum.attributes: + for a in sum.attributes: + self.emit_identifier(a.name) self.emit("static char *%s_attributes[] = {" % name, 0) for a in sum.attributes: self.emit('"%s",' % a.name, 1) @@ -579,6 +592,8 @@ class PyTypesDeclareVisitor(PickleVisitor): def visitConstructor(self, cons, name): self.emit("static PyTypeObject *%s_type;" % cons.name, 0) if cons.fields: + for t in cons.fields: + self.emit_identifier(t.name) self.emit("static char *%s_fields[]={" % cons.name, 0) for t in cons.fields: self.emit('"%s",' % t.name, 1) @@ -591,10 +606,11 @@ class PyTypesVisitor(PickleVisitor): static int ast_type_init(PyObject *self, PyObject *args, PyObject *kw) { + _Py_IDENTIFIER(_fields); Py_ssize_t i, numfields = 0; int res = -1; PyObject *key, *value, *fields; - fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields"); + fields = _PyObject_GetAttrId((PyObject*)Py_TYPE(self), &PyId__fields); if (!fields) PyErr_Clear(); if (fields) { @@ -644,7 +660,8 @@ static PyObject * ast_type_reduce(PyObject *self, PyObject *unused) { PyObject *res; - PyObject *dict = PyObject_GetAttrString(self, "__dict__"); + _Py_IDENTIFIER(__dict__); + PyObject *dict = _PyObject_GetAttrId(self, &PyId___dict__); if (dict == NULL) { if (PyErr_ExceptionMatches(PyExc_AttributeError)) PyErr_Clear(); @@ -730,6 +747,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) { int i, result; + _Py_IDENTIFIER(_attributes); PyObject *s, *l = PyTuple_New(num_fields); if (!l) return 0; @@ -741,7 +759,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) } PyTuple_SET_ITEM(l, i, s); } - result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0; + result = _PyObject_SetAttrId((PyObject*)type, &PyId__attributes, l) >= 0; Py_DECREF(l); return result; } @@ -775,6 +793,7 @@ static PyObject* ast2obj_object(void *o) } #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object +#define ast2obj_bytes ast2obj_object static PyObject* ast2obj_int(long b) { @@ -812,6 +831,15 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) return obj2ast_object(obj, out, arena); } +static int obj2ast_bytes(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (!PyBytes_CheckExact(obj)) { + PyErr_SetString(PyExc_TypeError, "AST bytes must be of type bytes"); + return 1; + } + return obj2ast_object(obj, out, arena); +} + static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) { int i; @@ -910,10 +938,6 @@ class ASTModuleVisitor(PickleVisitor): self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1) self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) self.emit("return NULL;", 2) - # Value of version: "$Revision$" - self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' - % mod.version, 1) - self.emit("return NULL;", 2) for dfn in mod.dfns: self.visit(dfn) self.emit("return m;", 1) @@ -997,7 +1021,7 @@ class ObjVisitor(PickleVisitor): for a in sum.attributes: self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1) self.emit("if (!value) goto failed;", 1) - self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1) + self.emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) < 0)' % a.name, 1) self.emit('goto failed;', 2) self.emit('Py_DECREF(value);', 1) self.func_end() @@ -1043,7 +1067,7 @@ class ObjVisitor(PickleVisitor): value = "o->v.%s.%s" % (name, field.name) self.set(field, value, depth) emit("if (!value) goto failed;", 0) - emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0) + emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) == -1)' % field.name, 0) emit("goto failed;", 1) emit("Py_DECREF(value);", 0) @@ -1134,24 +1158,12 @@ class ChainOfVisitors: common_msg = "/* File automatically generated by %s. */\n\n" -c_file_msg = """ -/* - __version__ %s. - - This module must be committed separately after each AST grammar change; - The __version__ number is set to the revision number of the commit - containing the grammar change. -*/ - -""" - def main(srcfile): argv0 = sys.argv[0] components = argv0.split(os.sep) argv0 = os.sep.join(components[-2:]) auto_gen_msg = common_msg % argv0 mod = asdl.parse(srcfile) - mod.version = "82163" if not asdl.check(mod): sys.exit(1) if INC_DIR: @@ -1173,7 +1185,6 @@ def main(srcfile): p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") f = open(p, "w") f.write(auto_gen_msg) - f.write(c_file_msg % mod.version) f.write('#include "Python.h"\n') f.write('#include "%s-ast.h"\n' % mod.name) f.write('\n') diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c deleted file mode 100644 index 4439864..0000000 --- a/Parser/intrcheck.c +++ /dev/null @@ -1,174 +0,0 @@ - -/* Check for interrupts */ - -#include "Python.h" -#include "pythread.h" - -#ifdef QUICKWIN - -#include <io.h> - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - _wyield(); -} - -#define OK - -#endif /* QUICKWIN */ - -#if defined(_M_IX86) && !defined(__QNX__) -#include <io.h> -#endif - -#if defined(MSDOS) && !defined(QUICKWIN) - -#ifdef __GNUC__ - -/* This is for DJGPP's GO32 extender. I don't know how to trap - * control-C (There's no API for ctrl-C, and I don't want to mess with - * the interrupt vectors.) However, this DOES catch control-break. - * --Amrit - */ - -#include <go32.h> - -void -PyOS_InitInterrupts(void) -{ - _go32_want_ctrl_break(1 /* TRUE */); -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - return _go32_was_ctrl_break_hit(); -} - -#else /* !__GNUC__ */ - -/* This might work for MS-DOS (untested though): */ - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - int interrupted = 0; - while (kbhit()) { - if (getch() == '\003') - interrupted = 1; - } - return interrupted; -} - -#endif /* __GNUC__ */ - -#define OK - -#endif /* MSDOS && !QUICKWIN */ - - -#ifndef OK - -/* Default version -- for real operating systems and for Standard C */ - -#include <stdio.h> -#include <string.h> -#include <signal.h> - -static int interrupted; - -void -PyErr_SetInterrupt(void) -{ - interrupted = 1; -} - -extern int PyErr_CheckSignals(void); - -static int -checksignals_witharg(void * arg) -{ - return PyErr_CheckSignals(); -} - -static void -intcatcher(int sig) -{ - extern void Py_Exit(int); - static char message[] = -"python: to interrupt a truly hanging Python program, interrupt once more.\n"; - switch (interrupted++) { - case 0: - break; - case 1: - write(2, message, strlen(message)); - break; - case 2: - interrupted = 0; - Py_Exit(1); - break; - } - PyOS_setsig(SIGINT, intcatcher); - Py_AddPendingCall(checksignals_witharg, NULL); -} - -static void (*old_siginthandler)(int) = SIG_DFL; - -void -PyOS_InitInterrupts(void) -{ - if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) - PyOS_setsig(SIGINT, intcatcher); -} - -void -PyOS_FiniInterrupts(void) -{ - PyOS_setsig(SIGINT, old_siginthandler); -} - -int -PyOS_InterruptOccurred(void) -{ - if (!interrupted) - return 0; - interrupted = 0; - return 1; -} - -#endif /* !OK */ - -void -PyOS_AfterFork(void) -{ -#ifdef WITH_THREAD - PyEval_ReInitThreads(); - PyThread_ReInitTLS(); -#endif -} diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 73e7e3c..f22ac67 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -13,7 +13,7 @@ /* Forward */ static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); -static void initerr(perrdetail *err_ret, const char* filename); +static int initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ node * @@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, struct tok_state *tok; int exec_input = start == file_input; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if (*flags & PyPARSE_IGNORE_COOKIE) tok = PyTokenizer_FromUTF8(s, exec_input); @@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, return NULL; } - tok->filename = filename ? filename : "<string>"; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, { struct tok_state *tok; - initerr(err_ret, filename); + if (initerr(err_ret, filename) < 0) + return NULL; if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { err_ret->error = E_NOMEM; return NULL; } - tok->filename = filename; +#ifndef PGEN + Py_INCREF(err_ret->filename); + tok->filename = err_ret->filename; +#endif return parsetok(tok, g, start, err_ret, flags); } @@ -127,7 +135,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, { parser_state *ps; node *n; - int started = 0, handling_import = 0, handling_with = 0; + int started = 0; if ((ps = PyParser_New(g, start)) == NULL) { fprintf(stderr, "no mem for new parser\n"); @@ -154,7 +162,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } if (type == ENDMARKER && started) { type = NEWLINE; /* Add an extra newline */ - handling_with = handling_import = 0; started = 0; /* Add the right number of dedent tokens, except if a certain flag is given -- @@ -227,7 +234,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, PyParser_Delete(ps); if (n == NULL) { - if (tok->lineno <= 1 && tok->done == E_EOF) + if (tok->done == E_EOF) err_ret->error = E_EOF; err_ret->lineno = tok->lineno; if (tok->buf != NULL) { @@ -270,14 +277,24 @@ done: return n; } -static void +static int initerr(perrdetail *err_ret, const char *filename) { err_ret->error = E_OK; - err_ret->filename = filename; err_ret->lineno = 0; err_ret->offset = 0; err_ret->text = NULL; err_ret->token = -1; err_ret->expected = -1; +#ifndef PGEN + if (filename) + err_ret->filename = PyUnicode_DecodeFSDefault(filename); + else + err_ret->filename = PyUnicode_FromString("<string>"); + if (err_ret->filename == NULL) { + err_ret->error = E_ERROR; + return -1; + } +#endif + return 0; } diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c new file mode 100644 index 0000000..97b9288 --- /dev/null +++ b/Parser/parsetok_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "parsetok.c" diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c index 4b7b55a..52b8380 100644 --- a/Parser/pgenmain.c +++ b/Parser/pgenmain.c @@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag; /* Forward */ grammar *getgrammar(char *filename); +void Py_Exit(int) _Py_NO_RETURN; + void Py_Exit(int sts) { diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5ba12a4..c3b2f35 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -128,7 +128,6 @@ tok_new(void) tok->prompt = tok->nextprompt = NULL; tok->lineno = 0; tok->level = 0; - tok->filename = NULL; tok->altwarning = 1; tok->alterror = 1; tok->alttabsize = 1; @@ -140,6 +139,7 @@ tok_new(void) tok->encoding = NULL; tok->cont_line = 0; #ifndef PGEN + tok->filename = NULL; tok->decoding_readline = NULL; tok->decoding_buffer = NULL; #endif @@ -462,6 +462,8 @@ static int fp_setreadl(struct tok_state *tok, const char* enc) { PyObject *readline = NULL, *stream = NULL, *io = NULL; + _Py_IDENTIFIER(open); + _Py_IDENTIFIER(readline); int fd; io = PyImport_ImportModuleNoBlock("io"); @@ -474,13 +476,13 @@ fp_setreadl(struct tok_state *tok, const char* enc) goto cleanup; } - stream = PyObject_CallMethod(io, "open", "isisOOO", + stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO", fd, "r", -1, enc, Py_None, Py_None, Py_False); if (stream == NULL) goto cleanup; Py_XDECREF(tok->decoding_readline); - readline = PyObject_GetAttrString(stream, "readline"); + readline = _PyObject_GetAttrId(stream, &PyId_readline); tok->decoding_readline = readline; /* The file has been reopened; parsing will restart from @@ -545,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) { char *line = NULL; int badchar = 0; - PyObject *filename; for (;;) { if (tok->decoding_state == STATE_NORMAL) { /* We already have a codec associated with @@ -586,19 +587,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok) if (badchar) { /* Need to add 1 to the line number, since this line has not been counted, yet. */ - if (tok->filename != NULL) - filename = PyUnicode_DecodeFSDefault(tok->filename); - else - filename = PyUnicode_FromString("<file>"); - if (filename != NULL) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see http://python.org/dev/peps/pep-0263/ for details", - badchar, filename, tok->lineno + 1); - Py_DECREF(filename); - } + PyErr_Format(PyExc_SyntaxError, + "Non-UTF-8 code starting with '\\x%.2x' " + "in file %U on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); return error_ret(tok); } #endif @@ -856,6 +850,7 @@ PyTokenizer_Free(struct tok_state *tok) #ifndef PGEN Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_buffer); + Py_XDECREF(tok->filename); #endif if (tok->fp != NULL && tok->buf != NULL) PyMem_FREE(tok->buf); @@ -1250,8 +1245,13 @@ indenterror(struct tok_state *tok) return 1; } if (tok->altwarning) { - PySys_WriteStderr("%s: inconsistent use of tabs and spaces " +#ifdef PGEN + PySys_WriteStderr("inconsistent use of tabs and spaces " + "in indentation\n"); +#else + PySys_FormatStderr("%U: inconsistent use of tabs and spaces " "in indentation\n", tok->filename); +#endif tok->altwarning = 0; } return 0; @@ -1260,14 +1260,16 @@ indenterror(struct tok_state *tok) #ifdef PGEN #define verify_identifier(tok) 1 #else -/* Verify that the identifier follows PEP 3131. */ +/* Verify that the identifier follows PEP 3131. + All identifier strings are guaranteed to be "ready" unicode objects. + */ static int verify_identifier(struct tok_state *tok) { PyObject *s; int result; s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL); - if (s == NULL) { + if (s == NULL || PyUnicode_READY(s) == -1) { if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { PyErr_Clear(); tok->done = E_IDENTIFIER; @@ -1692,17 +1694,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } -/* Get -*- encoding -*- from a Python file. +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) { struct tok_state *tok; FILE *fp; @@ -1721,6 +1724,20 @@ PyTokenizer_FindEncoding(int fd) fclose(fp); return NULL; } +#ifndef PGEN + if (filename != NULL) { + Py_INCREF(filename); + tok->filename = filename; + } + else { + tok->filename = PyUnicode_FromString("<string>"); + if (tok->filename == NULL) { + fclose(fp); + PyTokenizer_Free(tok); + return encoding; + } + } +#endif while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1734,6 +1751,12 @@ PyTokenizer_FindEncoding(int fd) return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2be3bf2..ed1f3aa 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,13 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* encoded to the filesystem encoding */ +#ifndef PGEN + /* pgen doesn't have access to Python codecs, it cannot decode the input + filename. The bytes filename might be kept, but it is only used by + indenterror() and it is not really needed: pgen only compiles one file + (Grammar/Grammar). */ + PyObject *filename; +#endif int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ @@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } |