diff options
Diffstat (limited to 'Modules/zipimport.c')
-rw-r--r-- | Modules/zipimport.c | 286 |
1 files changed, 238 insertions, 48 deletions
diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 8fe9195..02bdb28 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -49,10 +49,16 @@ struct _zipimporter { static PyObject *ZipImportError; /* read_directory() cache */ static PyObject *zip_directory_cache = NULL; +static PyObject *zip_stat_cache = NULL; +/* posix.fstat or nt.fstat function. Used due to posixmodule.c's + * superior fstat implementation over libc's on Windows. */ +static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */ /* forward decls */ -static PyObject *read_directory(PyObject *archive); -static PyObject *get_data(PyObject *archive, PyObject *toc_entry); +static FILE *fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p); +static FILE *safely_reopen_archive(ZipImporter *self); +static PyObject *read_directory(FILE *fp, PyObject *archive); +static PyObject *get_data(FILE *fp, PyObject *archive, PyObject *toc_entry); static PyObject *get_module_code(ZipImporter *self, PyObject *fullname, int *p_ispackage, PyObject **p_modpath); @@ -131,11 +137,39 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) files = PyDict_GetItem(zip_directory_cache, filename); if (files == NULL) { - files = read_directory(filename); - if (files == NULL) + PyObject *zip_stat = NULL; + FILE *fp = fopen_rb_and_stat(filename, &zip_stat); + if (fp == NULL) { + if (!PyErr_Occurred()) + PyErr_Format(ZipImportError, "can't open Zip file: %R", + filename); + + Py_XDECREF(zip_stat); goto error; - if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) + } + + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: %U not cached, " + "reading TOC.\n", filename); + + files = read_directory(fp, filename); + fclose(fp); + if (files == NULL) { + Py_XDECREF(zip_stat); + goto error; + } + if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) { + Py_DECREF(files); + Py_XDECREF(zip_stat); goto error; + } + if (zip_stat && PyDict_SetItem(zip_stat_cache, filename, + zip_stat) != 0) { + Py_DECREF(files); + Py_DECREF(zip_stat); + goto error; + } + Py_XDECREF(zip_stat); } else Py_INCREF(files); @@ -560,7 +594,8 @@ zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *path, *key; - PyObject *toc_entry; + FILE *fp; + PyObject *toc_entry, *data; Py_ssize_t path_start, path_len, len; if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path)) @@ -588,15 +623,23 @@ zipimporter_get_data(PyObject *obj, PyObject *args) key = PyUnicode_Substring(path, path_start, path_len); if (key == NULL) goto error; + + fp = safely_reopen_archive(self); + if (fp == NULL) + goto error; + toc_entry = PyDict_GetItem(self->files, key); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); Py_DECREF(key); + fclose(fp); goto error; } Py_DECREF(key); Py_DECREF(path); - return get_data(self->archive, toc_entry); + data = get_data(fp, self->archive, toc_entry); + fclose(fp); + return data; error: Py_DECREF(path); return NULL; @@ -621,6 +664,7 @@ zipimporter_get_source(PyObject *obj, PyObject *args) PyObject *toc_entry; PyObject *fullname, *subname, *path, *fullpath; enum zi_module_info mi; + FILE *fp; if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname)) return NULL; @@ -650,11 +694,18 @@ zipimporter_get_source(PyObject *obj, PyObject *args) if (fullpath == NULL) return NULL; + fp = safely_reopen_archive(self); + if (fp == NULL) { + Py_DECREF(fullpath); + return NULL; + } + toc_entry = PyDict_GetItem(self->files, fullpath); Py_DECREF(fullpath); if (toc_entry != NULL) { PyObject *res, *bytes; - bytes = get_data(self->archive, toc_entry); + bytes = get_data(fp, self->archive, toc_entry); + fclose(fp); if (bytes == NULL) return NULL; res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), @@ -662,10 +713,10 @@ zipimporter_get_source(PyObject *obj, PyObject *args) Py_DECREF(bytes); return res; } + fclose(fp); /* we have the module, but no source */ - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } PyDoc_STRVAR(doc_find_module, @@ -831,10 +882,135 @@ get_long(unsigned char *buf) { return x; } +/* Return 1 if objects a and b fail a Py_EQ test for an attr. */ +static int +compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name) +{ + int problem = 0; + PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name); + PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name); + if (attr_a == NULL || attr_b == NULL) + problem = 1; + else + problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1); + Py_XDECREF(attr_a); + Py_XDECREF(attr_b); + return problem; +} + /* - read_directory(archive) -> files dict (new reference) + * Returns an open FILE * on success. + * Returns NULL on error with the Python error context set. + */ +static FILE * +safely_reopen_archive(ZipImporter *self) +{ + FILE *fp; + PyObject *stat_now = NULL; + + fp = fopen_rb_and_stat(self->archive, &stat_now); + if (!fp) { + PyErr_Format(ZipImportError, + "zipimport: can not open file %U", self->archive); + Py_XDECREF(stat_now); + return NULL; + } - Given a path to a Zip archive, build a dict, mapping file names + if (stat_now != NULL) { + int problem = 0; + PyObject *files; + PyObject *prev_stat = PyDict_GetItem(zip_stat_cache, self->archive); + /* Test stat_now vs the old cached stat on some key attributes. */ + if (prev_stat != NULL) { + problem = compare_obj_attr_strings(prev_stat, stat_now, + "st_ino"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_size"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_mtime"); + } else { + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: no stat data for %U!\n", + self->archive); + problem = 1; + } + + if (problem) { + if (Py_VerboseFlag) + PySys_FormatStderr("# zipimport: %U modified since last" + " import, rereading TOC.\n", self->archive); + files = read_directory(fp, self->archive); + if (files == NULL) { + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (PyDict_SetItem(zip_directory_cache, self->archive, + files) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive, + stat_now) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + Py_XDECREF(self->files); /* free the old value. */ + self->files = files; + } else { + /* No problem, discard the new stat data. */ + Py_DECREF(stat_now); + } + } /* stat succeeded */ + + return fp; +} + +/* + fopen_rb_and_stat(path, &py_stat) -> FILE * + + Opens path in "rb" mode and populates the Python py_stat stat_result + with information about the opened file. *py_stat may not be changed + if there is no fstat_function or if fstat_function fails. + + Returns NULL and does nothing to *py_stat if the open failed. +*/ +static FILE * +fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p) +{ + FILE *fp; + assert(py_stat_p != NULL); + assert(*py_stat_p == NULL); + + fp = _Py_fopen_obj(path, "rb"); + if (fp == NULL) { + if (!PyErr_Occurred()) + PyErr_Format(ZipImportError, + "zipimport: can not open file %U", path); + return NULL; + } + + if (fstat_function) { + PyObject *stat_result = PyObject_CallFunction(fstat_function, + "i", fileno(fp)); + if (stat_result == NULL) { + PyErr_Clear(); /* We can function without it. */ + } else { + *py_stat_p = stat_result; + } + } + + return fp; +} + +/* + read_directory(fp, archive) -> files dict (new reference) + + Given an open Zip archive, build a dict, mapping file names (local to the archive, using SEP as a separator) to toc entries. A toc_entry is a tuple: @@ -854,10 +1030,9 @@ get_long(unsigned char *buf) { data_size and file_offset are 0. */ static PyObject * -read_directory(PyObject *archive) +read_directory(FILE *fp, PyObject *archive) { PyObject *files = NULL; - FILE *fp; unsigned short flags; short compress, time, date, name_size; long crc, data_size, file_size, header_size; @@ -873,27 +1048,18 @@ read_directory(PyObject *archive) const char *charset; int bootstrap; - fp = _Py_fopen_obj(archive, "rb"); - if (fp == NULL) { - if (!PyErr_Occurred()) - PyErr_Format(ZipImportError, "can't open Zip file: %R", archive); - return NULL; - } - + assert(fp != NULL); if (fseek(fp, -22, SEEK_END) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } header_position = ftell(fp); if (fread(endof_central_dir, 1, 22, fp) != 22) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ - fclose(fp); PyErr_Format(ZipImportError, "not a Zip file: %R", archive); return NULL; } @@ -1000,19 +1166,16 @@ read_directory(PyObject *archive) goto error; count++; } - fclose(fp); if (Py_VerboseFlag) PySys_FormatStderr("# zipimport: found %ld names in %R\n", count, archive); return files; file_error: - fclose(fp); Py_XDECREF(files); Py_XDECREF(nameobj); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; error: - fclose(fp); Py_XDECREF(files); Py_XDECREF(nameobj); return NULL; @@ -1051,14 +1214,13 @@ get_decompress_func(void) return decompress; } -/* Given a path to a Zip file and a toc_entry, return the (uncompressed) +/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed) data as a new reference. */ static PyObject * -get_data(PyObject *archive, PyObject *toc_entry) +get_data(FILE *fp, PyObject *archive, PyObject *toc_entry) { PyObject *raw_data, *data = NULL, *decompress; char *buf; - FILE *fp; int err; Py_ssize_t bytes_read = 0; long l; @@ -1072,17 +1234,8 @@ get_data(PyObject *archive, PyObject *toc_entry) return NULL; } - fp = _Py_fopen_obj(archive, "rb"); - if (!fp) { - if (!PyErr_Occurred()) - PyErr_Format(PyExc_IOError, - "zipimport: can not open file %U", archive); - return NULL; - } - /* Check to make sure the local file header is correct */ if (fseek(fp, file_offset, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } @@ -1094,11 +1247,9 @@ get_data(PyObject *archive, PyObject *toc_entry) PyErr_Format(ZipImportError, "bad local file header in %U", archive); - fclose(fp); return NULL; } if (fseek(fp, file_offset + 26, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } @@ -1106,7 +1257,6 @@ get_data(PyObject *archive, PyObject *toc_entry) l = 30 + PyMarshal_ReadShortFromFile(fp) + PyMarshal_ReadShortFromFile(fp); /* local header size */ if (PyErr_Occurred()) { - fclose(fp); return NULL; } file_offset += l; /* Start of file data */ @@ -1117,7 +1267,6 @@ get_data(PyObject *archive, PyObject *toc_entry) raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size); if (raw_data == NULL) { - fclose(fp); return NULL; } buf = PyBytes_AsString(raw_data); @@ -1126,11 +1275,9 @@ get_data(PyObject *archive, PyObject *toc_entry) if (err == 0) { bytes_read = fread(buf, 1, data_size, fp); } else { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } - fclose(fp); if (err || bytes_read != data_size) { PyErr_SetString(PyExc_IOError, "zipimport: can't read data"); @@ -1351,12 +1498,12 @@ get_mtime_of_source(ZipImporter *self, PyObject *path) /* Return the code object for the module named by 'fullname' from the Zip archive as a new reference. */ static PyObject * -get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, +get_code_from_data(ZipImporter *self, FILE *fp, int ispackage, int isbytecode, time_t mtime, PyObject *toc_entry) { PyObject *data, *modpath, *code; - data = get_data(self->archive, toc_entry); + data = get_data(fp, self->archive, toc_entry); if (data == NULL) return NULL; @@ -1378,6 +1525,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, PyObject *code = NULL, *toc_entry, *subname; PyObject *path, *fullpath = NULL; struct st_zip_searchorder *zso; + FILE *fp; subname = get_subname(fullname); if (subname == NULL) @@ -1388,6 +1536,12 @@ get_module_code(ZipImporter *self, PyObject *fullname, if (path == NULL) return NULL; + fp = safely_reopen_archive(self); + if (fp == NULL) { + Py_DECREF(path); + return NULL; + } + for (zso = zip_searchorder; *zso->suffix; zso++) { code = NULL; @@ -1398,6 +1552,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, if (Py_VerboseFlag > 1) PySys_FormatStderr("# trying %U%c%U\n", self->archive, (int)SEP, fullpath); + toc_entry = PyDict_GetItem(self->files, fullpath); if (toc_entry != NULL) { time_t mtime = 0; @@ -1413,7 +1568,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, Py_CLEAR(fullpath); if (p_ispackage != NULL) *p_ispackage = ispackage; - code = get_code_from_data(self, ispackage, + code = get_code_from_data(self, fp, ispackage, isbytecode, mtime, toc_entry); if (code == Py_None) { @@ -1433,6 +1588,7 @@ get_module_code(ZipImporter *self, PyObject *fullname, } PyErr_Format(ZipImportError, "can't find module %R", fullname); exit: + fclose(fp); Py_DECREF(path); Py_XDECREF(fullpath); return code; @@ -1450,6 +1606,8 @@ This module exports three objects:\n\ subclass of ImportError, so it can be caught as ImportError, too.\n\ - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ info dicts, as used in zipimporter._files.\n\ +- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\ + info for the .zip the last time anything was imported from it.\n\ \n\ It is usually not needed to use the zipimport module explicitly; it is\n\ used by the builtin import mechanism for sys.path items that are paths\n\ @@ -1509,6 +1667,7 @@ PyInit_zipimport(void) (PyObject *)&ZipImporter_Type) < 0) return NULL; + Py_XDECREF(zip_directory_cache); /* Avoid embedded interpreter leaks. */ zip_directory_cache = PyDict_New(); if (zip_directory_cache == NULL) return NULL; @@ -1516,5 +1675,36 @@ PyInit_zipimport(void) if (PyModule_AddObject(mod, "_zip_directory_cache", zip_directory_cache) < 0) return NULL; + + Py_XDECREF(zip_stat_cache); /* Avoid embedded interpreter leaks. */ + zip_stat_cache = PyDict_New(); + if (zip_stat_cache == NULL) + return NULL; + Py_INCREF(zip_stat_cache); + if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0) + return NULL; + + { + /* We cannot import "os" here as that is a .py/.pyc file that could + * live within a zipped up standard library. Import the posix or nt + * builtin that provides the fstat() function we want instead. */ + PyObject *os_like_module; + Py_CLEAR(fstat_function); /* Avoid embedded interpreter leaks. */ + os_like_module = PyImport_ImportModule("posix"); + if (os_like_module == NULL) { + PyErr_Clear(); + os_like_module = PyImport_ImportModule("nt"); + } + if (os_like_module != NULL) { + fstat_function = PyObject_GetAttrString(os_like_module, "fstat"); + Py_DECREF(os_like_module); + } + if (fstat_function == NULL) { + PyErr_Clear(); /* non-fatal, we'll go on without it. */ + if (Py_VerboseFlag) + PySys_WriteStderr("# zipimport unable to use os.fstat().\n"); + } + } + return mod; } |