summaryrefslogtreecommitdiffstats
path: root/Modules/zipimport.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/zipimport.c')
-rw-r--r--Modules/zipimport.c286
1 files changed, 238 insertions, 48 deletions
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
index 8fe9195..02bdb28 100644
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -49,10 +49,16 @@ struct _zipimporter {
static PyObject *ZipImportError;
/* read_directory() cache */
static PyObject *zip_directory_cache = NULL;
+static PyObject *zip_stat_cache = NULL;
+/* posix.fstat or nt.fstat function. Used due to posixmodule.c's
+ * superior fstat implementation over libc's on Windows. */
+static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */
/* forward decls */
-static PyObject *read_directory(PyObject *archive);
-static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
+static FILE *fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p);
+static FILE *safely_reopen_archive(ZipImporter *self);
+static PyObject *read_directory(FILE *fp, PyObject *archive);
+static PyObject *get_data(FILE *fp, PyObject *archive, PyObject *toc_entry);
static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
int *p_ispackage, PyObject **p_modpath);
@@ -131,11 +137,39 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
files = PyDict_GetItem(zip_directory_cache, filename);
if (files == NULL) {
- files = read_directory(filename);
- if (files == NULL)
+ PyObject *zip_stat = NULL;
+ FILE *fp = fopen_rb_and_stat(filename, &zip_stat);
+ if (fp == NULL) {
+ if (!PyErr_Occurred())
+ PyErr_Format(ZipImportError, "can't open Zip file: %R",
+ filename);
+
+ Py_XDECREF(zip_stat);
goto error;
- if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
+ }
+
+ if (Py_VerboseFlag)
+ PySys_FormatStderr("# zipimport: %U not cached, "
+ "reading TOC.\n", filename);
+
+ files = read_directory(fp, filename);
+ fclose(fp);
+ if (files == NULL) {
+ Py_XDECREF(zip_stat);
+ goto error;
+ }
+ if (PyDict_SetItem(zip_directory_cache, filename, files) != 0) {
+ Py_DECREF(files);
+ Py_XDECREF(zip_stat);
goto error;
+ }
+ if (zip_stat && PyDict_SetItem(zip_stat_cache, filename,
+ zip_stat) != 0) {
+ Py_DECREF(files);
+ Py_DECREF(zip_stat);
+ goto error;
+ }
+ Py_XDECREF(zip_stat);
}
else
Py_INCREF(files);
@@ -560,7 +594,8 @@ zipimporter_get_data(PyObject *obj, PyObject *args)
{
ZipImporter *self = (ZipImporter *)obj;
PyObject *path, *key;
- PyObject *toc_entry;
+ FILE *fp;
+ PyObject *toc_entry, *data;
Py_ssize_t path_start, path_len, len;
if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
@@ -588,15 +623,23 @@ zipimporter_get_data(PyObject *obj, PyObject *args)
key = PyUnicode_Substring(path, path_start, path_len);
if (key == NULL)
goto error;
+
+ fp = safely_reopen_archive(self);
+ if (fp == NULL)
+ goto error;
+
toc_entry = PyDict_GetItem(self->files, key);
if (toc_entry == NULL) {
PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
Py_DECREF(key);
+ fclose(fp);
goto error;
}
Py_DECREF(key);
Py_DECREF(path);
- return get_data(self->archive, toc_entry);
+ data = get_data(fp, self->archive, toc_entry);
+ fclose(fp);
+ return data;
error:
Py_DECREF(path);
return NULL;
@@ -621,6 +664,7 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
PyObject *toc_entry;
PyObject *fullname, *subname, *path, *fullpath;
enum zi_module_info mi;
+ FILE *fp;
if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
return NULL;
@@ -650,11 +694,18 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
if (fullpath == NULL)
return NULL;
+ fp = safely_reopen_archive(self);
+ if (fp == NULL) {
+ Py_DECREF(fullpath);
+ return NULL;
+ }
+
toc_entry = PyDict_GetItem(self->files, fullpath);
Py_DECREF(fullpath);
if (toc_entry != NULL) {
PyObject *res, *bytes;
- bytes = get_data(self->archive, toc_entry);
+ bytes = get_data(fp, self->archive, toc_entry);
+ fclose(fp);
if (bytes == NULL)
return NULL;
res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
@@ -662,10 +713,10 @@ zipimporter_get_source(PyObject *obj, PyObject *args)
Py_DECREF(bytes);
return res;
}
+ fclose(fp);
/* we have the module, but no source */
- Py_INCREF(Py_None);
- return Py_None;
+ Py_RETURN_NONE;
}
PyDoc_STRVAR(doc_find_module,
@@ -831,10 +882,135 @@ get_long(unsigned char *buf) {
return x;
}
+/* Return 1 if objects a and b fail a Py_EQ test for an attr. */
+static int
+compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name)
+{
+ int problem = 0;
+ PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name);
+ PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name);
+ if (attr_a == NULL || attr_b == NULL)
+ problem = 1;
+ else
+ problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1);
+ Py_XDECREF(attr_a);
+ Py_XDECREF(attr_b);
+ return problem;
+}
+
/*
- read_directory(archive) -> files dict (new reference)
+ * Returns an open FILE * on success.
+ * Returns NULL on error with the Python error context set.
+ */
+static FILE *
+safely_reopen_archive(ZipImporter *self)
+{
+ FILE *fp;
+ PyObject *stat_now = NULL;
+
+ fp = fopen_rb_and_stat(self->archive, &stat_now);
+ if (!fp) {
+ PyErr_Format(ZipImportError,
+ "zipimport: can not open file %U", self->archive);
+ Py_XDECREF(stat_now);
+ return NULL;
+ }
- Given a path to a Zip archive, build a dict, mapping file names
+ if (stat_now != NULL) {
+ int problem = 0;
+ PyObject *files;
+ PyObject *prev_stat = PyDict_GetItem(zip_stat_cache, self->archive);
+ /* Test stat_now vs the old cached stat on some key attributes. */
+ if (prev_stat != NULL) {
+ problem = compare_obj_attr_strings(prev_stat, stat_now,
+ "st_ino");
+ problem |= compare_obj_attr_strings(prev_stat, stat_now,
+ "st_size");
+ problem |= compare_obj_attr_strings(prev_stat, stat_now,
+ "st_mtime");
+ } else {
+ if (Py_VerboseFlag)
+ PySys_FormatStderr("# zipimport: no stat data for %U!\n",
+ self->archive);
+ problem = 1;
+ }
+
+ if (problem) {
+ if (Py_VerboseFlag)
+ PySys_FormatStderr("# zipimport: %U modified since last"
+ " import, rereading TOC.\n", self->archive);
+ files = read_directory(fp, self->archive);
+ if (files == NULL) {
+ Py_DECREF(stat_now);
+ fclose(fp);
+ return NULL;
+ }
+ if (PyDict_SetItem(zip_directory_cache, self->archive,
+ files) != 0) {
+ Py_DECREF(files);
+ Py_DECREF(stat_now);
+ fclose(fp);
+ return NULL;
+ }
+ if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive,
+ stat_now) != 0) {
+ Py_DECREF(files);
+ Py_DECREF(stat_now);
+ fclose(fp);
+ return NULL;
+ }
+ Py_XDECREF(self->files); /* free the old value. */
+ self->files = files;
+ } else {
+ /* No problem, discard the new stat data. */
+ Py_DECREF(stat_now);
+ }
+ } /* stat succeeded */
+
+ return fp;
+}
+
+/*
+ fopen_rb_and_stat(path, &py_stat) -> FILE *
+
+ Opens path in "rb" mode and populates the Python py_stat stat_result
+ with information about the opened file. *py_stat may not be changed
+ if there is no fstat_function or if fstat_function fails.
+
+ Returns NULL and does nothing to *py_stat if the open failed.
+*/
+static FILE *
+fopen_rb_and_stat(PyObject *path, PyObject **py_stat_p)
+{
+ FILE *fp;
+ assert(py_stat_p != NULL);
+ assert(*py_stat_p == NULL);
+
+ fp = _Py_fopen_obj(path, "rb");
+ if (fp == NULL) {
+ if (!PyErr_Occurred())
+ PyErr_Format(ZipImportError,
+ "zipimport: can not open file %U", path);
+ return NULL;
+ }
+
+ if (fstat_function) {
+ PyObject *stat_result = PyObject_CallFunction(fstat_function,
+ "i", fileno(fp));
+ if (stat_result == NULL) {
+ PyErr_Clear(); /* We can function without it. */
+ } else {
+ *py_stat_p = stat_result;
+ }
+ }
+
+ return fp;
+}
+
+/*
+ read_directory(fp, archive) -> files dict (new reference)
+
+ Given an open Zip archive, build a dict, mapping file names
(local to the archive, using SEP as a separator) to toc entries.
A toc_entry is a tuple:
@@ -854,10 +1030,9 @@ get_long(unsigned char *buf) {
data_size and file_offset are 0.
*/
static PyObject *
-read_directory(PyObject *archive)
+read_directory(FILE *fp, PyObject *archive)
{
PyObject *files = NULL;
- FILE *fp;
unsigned short flags;
short compress, time, date, name_size;
long crc, data_size, file_size, header_size;
@@ -873,27 +1048,18 @@ read_directory(PyObject *archive)
const char *charset;
int bootstrap;
- fp = _Py_fopen_obj(archive, "rb");
- if (fp == NULL) {
- if (!PyErr_Occurred())
- PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
- return NULL;
- }
-
+ assert(fp != NULL);
if (fseek(fp, -22, SEEK_END) == -1) {
- fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
header_position = ftell(fp);
if (fread(endof_central_dir, 1, 22, fp) != 22) {
- fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
/* Bad: End of Central Dir signature */
- fclose(fp);
PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
return NULL;
}
@@ -1000,19 +1166,16 @@ read_directory(PyObject *archive)
goto error;
count++;
}
- fclose(fp);
if (Py_VerboseFlag)
PySys_FormatStderr("# zipimport: found %ld names in %R\n",
count, archive);
return files;
file_error:
- fclose(fp);
Py_XDECREF(files);
Py_XDECREF(nameobj);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
error:
- fclose(fp);
Py_XDECREF(files);
Py_XDECREF(nameobj);
return NULL;
@@ -1051,14 +1214,13 @@ get_decompress_func(void)
return decompress;
}
-/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
+/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed)
data as a new reference. */
static PyObject *
-get_data(PyObject *archive, PyObject *toc_entry)
+get_data(FILE *fp, PyObject *archive, PyObject *toc_entry)
{
PyObject *raw_data, *data = NULL, *decompress;
char *buf;
- FILE *fp;
int err;
Py_ssize_t bytes_read = 0;
long l;
@@ -1072,17 +1234,8 @@ get_data(PyObject *archive, PyObject *toc_entry)
return NULL;
}
- fp = _Py_fopen_obj(archive, "rb");
- if (!fp) {
- if (!PyErr_Occurred())
- PyErr_Format(PyExc_IOError,
- "zipimport: can not open file %U", archive);
- return NULL;
- }
-
/* Check to make sure the local file header is correct */
if (fseek(fp, file_offset, 0) == -1) {
- fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
@@ -1094,11 +1247,9 @@ get_data(PyObject *archive, PyObject *toc_entry)
PyErr_Format(ZipImportError,
"bad local file header in %U",
archive);
- fclose(fp);
return NULL;
}
if (fseek(fp, file_offset + 26, 0) == -1) {
- fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
@@ -1106,7 +1257,6 @@ get_data(PyObject *archive, PyObject *toc_entry)
l = 30 + PyMarshal_ReadShortFromFile(fp) +
PyMarshal_ReadShortFromFile(fp); /* local header size */
if (PyErr_Occurred()) {
- fclose(fp);
return NULL;
}
file_offset += l; /* Start of file data */
@@ -1117,7 +1267,6 @@ get_data(PyObject *archive, PyObject *toc_entry)
raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
if (raw_data == NULL) {
- fclose(fp);
return NULL;
}
buf = PyBytes_AsString(raw_data);
@@ -1126,11 +1275,9 @@ get_data(PyObject *archive, PyObject *toc_entry)
if (err == 0) {
bytes_read = fread(buf, 1, data_size, fp);
} else {
- fclose(fp);
PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
return NULL;
}
- fclose(fp);
if (err || bytes_read != data_size) {
PyErr_SetString(PyExc_IOError,
"zipimport: can't read data");
@@ -1351,12 +1498,12 @@ get_mtime_of_source(ZipImporter *self, PyObject *path)
/* Return the code object for the module named by 'fullname' from the
Zip archive as a new reference. */
static PyObject *
-get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
+get_code_from_data(ZipImporter *self, FILE *fp, int ispackage, int isbytecode,
time_t mtime, PyObject *toc_entry)
{
PyObject *data, *modpath, *code;
- data = get_data(self->archive, toc_entry);
+ data = get_data(fp, self->archive, toc_entry);
if (data == NULL)
return NULL;
@@ -1378,6 +1525,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
PyObject *code = NULL, *toc_entry, *subname;
PyObject *path, *fullpath = NULL;
struct st_zip_searchorder *zso;
+ FILE *fp;
subname = get_subname(fullname);
if (subname == NULL)
@@ -1388,6 +1536,12 @@ get_module_code(ZipImporter *self, PyObject *fullname,
if (path == NULL)
return NULL;
+ fp = safely_reopen_archive(self);
+ if (fp == NULL) {
+ Py_DECREF(path);
+ return NULL;
+ }
+
for (zso = zip_searchorder; *zso->suffix; zso++) {
code = NULL;
@@ -1398,6 +1552,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
if (Py_VerboseFlag > 1)
PySys_FormatStderr("# trying %U%c%U\n",
self->archive, (int)SEP, fullpath);
+
toc_entry = PyDict_GetItem(self->files, fullpath);
if (toc_entry != NULL) {
time_t mtime = 0;
@@ -1413,7 +1568,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
Py_CLEAR(fullpath);
if (p_ispackage != NULL)
*p_ispackage = ispackage;
- code = get_code_from_data(self, ispackage,
+ code = get_code_from_data(self, fp, ispackage,
isbytecode, mtime,
toc_entry);
if (code == Py_None) {
@@ -1433,6 +1588,7 @@ get_module_code(ZipImporter *self, PyObject *fullname,
}
PyErr_Format(ZipImportError, "can't find module %R", fullname);
exit:
+ fclose(fp);
Py_DECREF(path);
Py_XDECREF(fullpath);
return code;
@@ -1450,6 +1606,8 @@ This module exports three objects:\n\
subclass of ImportError, so it can be caught as ImportError, too.\n\
- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
info dicts, as used in zipimporter._files.\n\
+- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\
+ info for the .zip the last time anything was imported from it.\n\
\n\
It is usually not needed to use the zipimport module explicitly; it is\n\
used by the builtin import mechanism for sys.path items that are paths\n\
@@ -1509,6 +1667,7 @@ PyInit_zipimport(void)
(PyObject *)&ZipImporter_Type) < 0)
return NULL;
+ Py_XDECREF(zip_directory_cache); /* Avoid embedded interpreter leaks. */
zip_directory_cache = PyDict_New();
if (zip_directory_cache == NULL)
return NULL;
@@ -1516,5 +1675,36 @@ PyInit_zipimport(void)
if (PyModule_AddObject(mod, "_zip_directory_cache",
zip_directory_cache) < 0)
return NULL;
+
+ Py_XDECREF(zip_stat_cache); /* Avoid embedded interpreter leaks. */
+ zip_stat_cache = PyDict_New();
+ if (zip_stat_cache == NULL)
+ return NULL;
+ Py_INCREF(zip_stat_cache);
+ if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0)
+ return NULL;
+
+ {
+ /* We cannot import "os" here as that is a .py/.pyc file that could
+ * live within a zipped up standard library. Import the posix or nt
+ * builtin that provides the fstat() function we want instead. */
+ PyObject *os_like_module;
+ Py_CLEAR(fstat_function); /* Avoid embedded interpreter leaks. */
+ os_like_module = PyImport_ImportModule("posix");
+ if (os_like_module == NULL) {
+ PyErr_Clear();
+ os_like_module = PyImport_ImportModule("nt");
+ }
+ if (os_like_module != NULL) {
+ fstat_function = PyObject_GetAttrString(os_like_module, "fstat");
+ Py_DECREF(os_like_module);
+ }
+ if (fstat_function == NULL) {
+ PyErr_Clear(); /* non-fatal, we'll go on without it. */
+ if (Py_VerboseFlag)
+ PySys_WriteStderr("# zipimport unable to use os.fstat().\n");
+ }
+ }
+
return mod;
}