From 40d20bcf1fccfe8af2393f1aec88ba18e38d0bc1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Oct 2007 00:09:51 +0000 Subject: Issue 1267, continued. Additional patch by Christian Heimes to deal more cleanly with the FILE* vs file-descriptor issues. I cleaned up his code a bit, and moved the lseek() call into import.c. --- Doc/c-api/concrete.rst | 30 +++++++++++------------------- Doc/reference/introduction.rst | 2 +- Include/fileobject.h | 5 +---- Lib/test/test_imp.py | 17 +++++++++++++++++ Misc/NEWS | 12 ++++++++++++ Modules/posixmodule.c | 9 ++++++++- Objects/bytesobject.c | 4 +--- Objects/fileobject.c | 16 +++++----------- Parser/tokenizer.c | 36 ++++++++++++++++++++---------------- Parser/tokenizer.h | 2 +- Python/import.c | 17 +++++++++++++---- Python/pythonrun.c | 6 +++--- 12 files changed, 93 insertions(+), 63 deletions(-) diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst index e48056c..1855688 100644 --- a/Doc/c-api/concrete.rst +++ b/Doc/c-api/concrete.rst @@ -2410,31 +2410,23 @@ change in future releases of Python. :ctype:`PyFileObject`. -.. cfunction:: PyObject* PyFile_FromString(char *filename, char *mode) +.. cfunction:: PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, char *newline) - .. index:: single: fopen() - - On success, return a new file object that is opened on the file given by - *filename*, with a file mode given by *mode*, where *mode* has the same - semantics as the standard C routine :cfunc:`fopen`. On failure, return *NULL*. - - -.. cfunction:: PyObject* PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE*)) + Create a new :ctype:`PyFileObject` from the file descriptor of an already + opened file *fd*. The arguments *name*, *encoding* and *newline* can be + *NULL* as well as buffering can be *-1* to use the defaults. Return *NULL* on + failure. - Create a new :ctype:`PyFileObject` from the already-open standard C file - pointer, *fp*. The function *close* will be called when the file should be - closed. Return *NULL* on failure. + .. warning:: -.. cfunction:: PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), int buffering, char *encoding, char *newline) + Take care when you are mixing streams and descriptors! For more + information, see `GNU C Library + `_. - Create a new :ctype:`PyFileObject` from the already-open standard C file - pointer, *fp*. The functions works similar to *PyFile_FromFile* but takes - optional arguments for *buffering*, *encoding* and *newline*. Use -1 resp. - *NULL* for default values. -.. cfunction:: FILE* PyFile_AsFile(PyObject *p) +.. cfunction:: int PyObject_AsFileDescriptor(PyObject *p) - Return the file object associated with *p* as a :ctype:`FILE\*`. + Return the file descriptor associated with *p* as an :ctype:`int`. .. cfunction:: PyObject* PyFile_GetLine(PyObject *p, int n) diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst index 4da1606..ceb2cf3 100644 --- a/Doc/reference/introduction.rst +++ b/Doc/reference/introduction.rst @@ -60,7 +60,7 @@ Python for .NET This implementation actually uses the CPython implementation, but is a managed .NET application and makes .NET libraries available. This was created by Brian Lloyd. For more information, see the `Python for .NET home page - `_. + `_. IronPython An alternate Python for .NET. Unlike Python.NET, this is a complete Python diff --git a/Include/fileobject.h b/Include/fileobject.h index b65d984..acb8c6d 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -8,10 +8,7 @@ extern "C" { #define PY_STDIOTEXTMODE "b" -PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE*)); -PyAPI_FUNC(PyObject *) PyFile_FromFileEx(FILE *, char *, char *, - int (*)(FILE *), int, char *, - char *); +PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *); PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 87efc33..268a4b7 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -44,6 +44,23 @@ class ImportTests(unittest.TestCase): fd = imp.find_module("heapq")[0] self.assertEqual(fd.encoding, "iso-8859-1") + def test_issue1267(self): + fp, filename, info = imp.find_module("pydoc") + self.assertNotEqual(fp, None) + self.assertEqual(fp.encoding, "iso-8859-1") + self.assertEqual(fp.tell(), 0) + self.assertEqual(fp.readline(), '#!/usr/bin/env python\n') + fp.close() + + fp, filename, info = imp.find_module("tokenize") + self.assertNotEqual(fp, None) + self.assertEqual(fp.encoding, "utf-8") + self.assertEqual(fp.tell(), 0) + self.assertEqual(fp.readline(), + '"""Tokenization help for Python programs.\n') + fp.close() + + def test_main(): test_support.run_unittest( LockTests, diff --git a/Misc/NEWS b/Misc/NEWS index 749b94c..86c5a59 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -8,6 +8,18 @@ What's New in Python 3.0a2? *Unreleased* +Core and Builtins +----------------- + +- Replaced `PyFile_FromFile()` with `PyFile_FromFd(fd, name. mode, buffer, + encoding, newline)` + +- Fixed `imp.find_module()` to obey the -*- coding: -*- header. + +- Changed `__file__` and `co_filename` to unicode. The path names are decoded + with `Py_FileSystemDefaultEncoding` and a new API method + `PyUnicode_DecodeFSDefault(char*)` was added. + Extension Modules ----------------- diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 647ea3e..e0de961 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5386,11 +5386,18 @@ static PyObject * posix_tmpfile(PyObject *self, PyObject *noargs) { FILE *fp; + int fd; fp = tmpfile(); if (fp == NULL) return posix_error(); - return PyFile_FromFile(fp, "", "w+b", fclose); + fd = fileno(fp); + if (fd != -1) + fd = dup(fd); + fclose(fp); + if (fd == -1) + return posix_error(); + return PyFile_FromFd(fd, "", "w+b", -1, NULL, NULL); } #endif diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 18d0f57..db475cd 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1214,7 +1214,7 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start, Py_ssize_t len = PyBytes_GET_SIZE(self); const char* str; Py_buffer vsubstr; - int rv; + int rv = 0; str = PyBytes_AS_STRING(self); @@ -1226,13 +1226,11 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start, if (direction < 0) { /* startswith */ if (start+vsubstr.len > len) { - rv = 0; goto done; } } else { /* endswith */ if (end-start < vsubstr.len || start > len) { - rv = 0; goto done; } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index b6d200d..b4abac5 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -26,22 +26,16 @@ extern "C" { /* External C interface */ PyObject * -PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *)) +PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding, + char *newline) { - return PyFile_FromFileEx(fp, name, mode, close, -1, NULL, NULL); -} - -PyObject * -PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), - int buffering, char *encoding, char *newline) -{ - PyObject *io, *stream, *nameobj=NULL; + PyObject *io, *stream, *nameobj = NULL; io = PyImport_ImportModule("io"); if (io == NULL) return NULL; - stream = PyObject_CallMethod(io, "open", "isiss", fileno(fp), mode, - buffering, encoding, newline); + stream = PyObject_CallMethod(io, "open", "isiss", fd, mode, + buffering, encoding, newline); Py_DECREF(io); if (stream == NULL) return NULL; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8c24cf2..5b3fd9e 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1602,40 +1602,44 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) } #endif -/* Get -*- encoding -*- from a Python file +/* Get -*- encoding -*- from a Python file. PyTokenizer_FindEncoding returns NULL when it can't find the encoding in the first or second line of the file (in which case the encoding should be assumed to be PyUnicode_GetDefaultEncoding()). - The char * returned was malloc'ed from PyMem_MALLOC() and thus must be freed - when no longer needed. + The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. */ char * -PyTokenizer_FindEncoding(FILE *fp) { +PyTokenizer_FindEncoding(int fd) +{ struct tok_state *tok; - char *p_start=NULL, *p_end=NULL, *encoding=NULL; + FILE *fp; + char *p_start =NULL , *p_end =NULL , *encoding = NULL; - if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) { - /* lseek() usage is on purpose; see note later in code. */ - lseek(fileno(fp), 0, 0); + fd = dup(fd); + if (fd < 0) { + return NULL; + } + fp = fdopen(fd, "r"); + if (fp == NULL) { + return NULL; + } + tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL); + if (tok == NULL) { + fclose(fp); return NULL; } - while(((tok->lineno < 2) && (tok->done == E_OK))) { + while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } - - /* lseek() must be used instead of fseek()/rewind() as those fail on - OS X 10.4 to properly seek back to the beginning when reading from - the file descriptor instead of the file pointer. */ - lseek(fileno(fp), 0, 0); - + fclose(fp); if (tok->encoding) { encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1); strcpy(encoding, tok->encoding); } PyTokenizer_Free(tok); - return encoding; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index a66d78e..c45dea1 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -67,7 +67,7 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(FILE *fp); +extern char * PyTokenizer_FindEncoding(int); #ifdef __cplusplus } diff --git a/Python/import.c b/Python/import.c index 2a316ca..2493554 100644 --- a/Python/import.c +++ b/Python/import.c @@ -92,7 +92,7 @@ static PyObject *extensions = NULL; extern struct _inittab _PyImport_Inittab[]; /* Method from Parser/tokenizer.c */ -extern char * PyTokenizer_FindEncoding(FILE *fp); +extern char * PyTokenizer_FindEncoding(int); struct _inittab *PyImport_Inittab = _PyImport_Inittab; @@ -2561,6 +2561,7 @@ call_find_module(char *name, PyObject *path) struct filedescr *fdp; char pathname[MAXPATHLEN+1]; FILE *fp = NULL; + int fd = -1; char *found_encoding = NULL; char *encoding = NULL; @@ -2571,17 +2572,25 @@ call_find_module(char *name, PyObject *path) if (fdp == NULL) return NULL; if (fp != NULL) { + fd = fileno(fp); + if (fd != -1) + fd = dup(fd); + fclose(fp); + fp = NULL; + } + if (fd != -1) { if (strchr(fdp->mode, 'b') == NULL) { /* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed memory. */ - found_encoding = PyTokenizer_FindEncoding(fp); + found_encoding = PyTokenizer_FindEncoding(fd); + lseek(fd, 0, 0); /* Reset position */ encoding = (found_encoding != NULL) ? found_encoding : (char*)PyUnicode_GetDefaultEncoding(); } - fob = PyFile_FromFileEx(fp, pathname, fdp->mode, fclose, -1, + fob = PyFile_FromFd(fd, pathname, fdp->mode, -1, (char*)encoding, NULL); if (fob == NULL) { - fclose(fp); + close(fd); PyMem_FREE(found_encoding); return NULL; } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index f641547..330667a 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -719,7 +719,7 @@ initstdio(void) } /* Set sys.stdin */ - if (!(std = PyFile_FromFileEx(stdin, "", "r", fclose, -1, + if (!(std = PyFile_FromFd(fileno(stdin), "", "r", -1, NULL, "\n"))) { goto error; } @@ -728,7 +728,7 @@ initstdio(void) Py_DECREF(std); /* Set sys.stdout */ - if (!(std = PyFile_FromFileEx(stdout, "", "w", fclose, -1, + if (!(std = PyFile_FromFd(fileno(stdout), "", "w", -1, NULL, "\n"))) { goto error; } @@ -737,7 +737,7 @@ initstdio(void) Py_DECREF(std); /* Set sys.stderr */ - if (!(std = PyFile_FromFileEx(stderr, "", "w", fclose, -1, + if (!(std = PyFile_FromFd(fileno(stderr), "", "w", -1, NULL, "\n"))) { goto error; } -- cgit v0.12