summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrett Cannon <bcannon@gmail.com>2007-10-20 03:46:49 (GMT)
committerBrett Cannon <bcannon@gmail.com>2007-10-20 03:46:49 (GMT)
commite453989f2e6ea6d3a30c671f87313905d0c66e5e (patch)
tree1c0daa0ee8810a3a81ae18f670d55c354c897fb4
parent3bb42d9341964fdf094c89c9a64965bd18588476 (diff)
downloadcpython-e453989f2e6ea6d3a30c671f87313905d0c66e5e.zip
cpython-e453989f2e6ea6d3a30c671f87313905d0c66e5e.tar.gz
cpython-e453989f2e6ea6d3a30c671f87313905d0c66e5e.tar.bz2
Fix PyTokenizer_FindEncoding() for OS X 10.4. Turns out that seeking to the
beginning of a file through a file pointer is not reflected when reading from a file descriptor. Using both fflush() and fpurge() does not solve it. One must use lseek() directly on the file descriptor to get the desired effect. This might suggest that we standardize on either file pointers (FILE) or file descriptors (int) for all C code used.
-rw-r--r--Parser/tokenizer.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 85f7508..c86fe7e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1605,8 +1605,11 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
/* Get -*- encoding -*- from a Python file
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
- the first or second line of the file. In this case the encoding is
- PyUnicode_GetDefaultEncoding().
+ the first or second line of the file (in which case the encoding
+ should be assumed to be PyUnicode_GetDefaultEncoding()).
+
+ The char * returned was malloc'ed from PyMem_MALLOC() and thus must be freed
+ when no longer needed.
*/
char *
PyTokenizer_FindEncoding(FILE *fp) {
@@ -1614,14 +1617,18 @@ PyTokenizer_FindEncoding(FILE *fp) {
char *p_start=NULL, *p_end=NULL, *encoding=NULL;
if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) {
- rewind(fp);
+ /* lseek() usage is on purpose; see note later in code. */
+ lseek(fileno(fp), 0, 0);
return NULL;
}
while(((tok->lineno < 2) && (tok->done == E_OK))) {
PyTokenizer_Get(tok, &p_start, &p_end);
}
- rewind(fp);
+ /* lseek() must be used instead of fseek()/rewind() as those fail on
+ OS X 10.4 to properly seek back to the beginning when reading from
+ the file descriptor instead of the file pointer. */
+ lseek(fileno(fp), 0, 0);
if (tok->encoding) {
encoding = (char *)PyMem_MALLOC(strlen(tok->encoding));