diff options
Diffstat (limited to 'Modules/zipimport.c')
-rw-r--r-- | Modules/zipimport.c | 366 |
1 files changed, 218 insertions, 148 deletions
diff --git a/Modules/zipimport.c b/Modules/zipimport.c index e874498..68c2894 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -35,19 +35,22 @@ typedef struct _zipimporter ZipImporter; struct _zipimporter { PyObject_HEAD - PyObject *archive; /* pathname of the Zip archive */ - PyObject *prefix; /* file prefix: "a/sub/directory/" */ + PyObject *archive; /* pathname of the Zip archive, + decoded from the filesystem encoding */ + PyObject *prefix; /* file prefix: "a/sub/directory/", + encoded to the filesystem encoding */ PyObject *files; /* dict with file info {path: toc_entry} */ }; static PyObject *ZipImportError; +/* read_directory() cache */ static PyObject *zip_directory_cache = NULL; /* forward decls */ -static PyObject *read_directory(char *archive); -static PyObject *get_data(char *archive, PyObject *toc_entry); +static PyObject *read_directory(PyObject *archive); +static PyObject *get_data(PyObject *archive, PyObject *toc_entry); static PyObject *get_module_code(ZipImporter *self, char *fullname, - int *p_ispackage, char **p_modpath); + int *p_ispackage, PyObject **p_modpath); #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type) @@ -60,26 +63,29 @@ static PyObject *get_module_code(ZipImporter *self, char *fullname, static int zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) { - char *path, *p, *prefix, buf[MAXPATHLEN+2]; - size_t len; + PyObject *pathobj, *files; + Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2]; + Py_ssize_t len; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) + if (!PyArg_ParseTuple(args, "O&:zipimporter", + PyUnicode_FSDecoder, &pathobj)) return -1; - len = strlen(path); + /* copy path to buf */ + len = PyUnicode_GET_SIZE(pathobj); if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); - return -1; + goto error; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); - return -1; + goto error; } - strcpy(buf, path); + Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj)); #ifdef ALTSEP for (p = buf; *p; p++) { @@ -94,7 +100,12 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) struct stat statbuf; int rv; - rv = stat(buf, &statbuf); + if (pathobj == NULL) { + pathobj = PyUnicode_FromUnicode(buf, len); + if (pathobj == NULL) + goto error; + } + rv = _Py_stat(pathobj, &statbuf); if (rv == 0) { /* it exists */ if (S_ISREG(statbuf.st_mode)) @@ -102,56 +113,60 @@ zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) path = buf; break; } + else if (PyErr_Occurred()) + goto error; /* back up one path element */ - p = strrchr(buf, SEP); + p = Py_UNICODE_strrchr(buf, SEP); if (prefix != NULL) *prefix = SEP; if (p == NULL) break; *p = '\0'; + len = p - buf; prefix = p; + Py_CLEAR(pathobj); } - if (path != NULL) { - PyObject *files; - files = PyDict_GetItemString(zip_directory_cache, path); - if (files == NULL) { - files = read_directory(buf); - if (files == NULL) - return -1; - if (PyDict_SetItemString(zip_directory_cache, path, - files) != 0) - return -1; - } - else - Py_INCREF(files); - self->files = files; - } - else { + if (path == NULL) { PyErr_SetString(ZipImportError, "not a Zip file"); - return -1; + goto error; } - if (prefix == NULL) - prefix = ""; - else { + files = PyDict_GetItem(zip_directory_cache, pathobj); + if (files == NULL) { + files = read_directory(pathobj); + if (files == NULL) + goto error; + if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0) + goto error; + } + else + Py_INCREF(files); + self->files = files; + + self->archive = pathobj; + pathobj = NULL; + + if (prefix != NULL) { prefix++; - len = strlen(prefix); + len = Py_UNICODE_strlen(prefix); if (prefix[len-1] != SEP) { /* add trailing SEP */ prefix[len] = SEP; prefix[len + 1] = '\0'; + len++; } } - - self->archive = PyUnicode_FromString(buf); - if (self->archive == NULL) - return -1; - - self->prefix = PyUnicode_FromString(prefix); + else + len = 0; + self->prefix = PyUnicode_FromUnicode(prefix, len); if (self->prefix == NULL) - return -1; + goto error; return 0; + +error: + Py_XDECREF(pathobj); + return -1; } /* GC support. */ @@ -176,19 +191,14 @@ zipimporter_dealloc(ZipImporter *self) static PyObject * zipimporter_repr(ZipImporter *self) { - char *archive = "???"; - char *prefix = ""; - - if (self->archive != NULL && PyUnicode_Check(self->archive)) - archive = _PyUnicode_AsString(self->archive); - if (self->prefix != NULL && PyUnicode_Check(self->prefix)) - prefix = _PyUnicode_AsString(self->prefix); - if (prefix != NULL && *prefix) - return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">", - archive, SEP, prefix); + if (self->archive == NULL) + return PyUnicode_FromString("<zipimporter object \"???\">"); + else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0) + return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">", + self->archive, SEP, self->prefix); else - return PyUnicode_FromFormat("<zipimporter object \"%.300s\">", - archive); + return PyUnicode_FromFormat("<zipimporter object \"%U\">", + self->archive); } /* return fullname.split(".")[-1] */ @@ -207,20 +217,26 @@ get_subname(char *fullname) archive (without extension) to the path buffer. Return the length of the resulting string. */ static int -make_filename(char *prefix, char *name, char *path) +make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize) { size_t len; char *p; + PyObject *prefix; - len = strlen(prefix); + prefix = PyUnicode_EncodeFSDefault(prefix_obj); + if (prefix == NULL) + return -1; + len = PyBytes_GET_SIZE(prefix); /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ - if (len + strlen(name) + 13 >= MAXPATHLEN) { + if (len + strlen(name) + 13 >= pathsize - 1) { PyErr_SetString(ZipImportError, "path too long"); + Py_DECREF(prefix); return -1; } - strcpy(path, prefix); + strcpy(path, PyBytes_AS_STRING(prefix)); + Py_DECREF(prefix); strcpy(path + len, name); for (p = path + len; *p; p++) { if (*p == '.') @@ -248,7 +264,7 @@ get_module_info(ZipImporter *self, char *fullname) subname = get_subname(fullname); - len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); + len = make_filename(self->prefix, subname, path, sizeof(path)); if (len < 0) return MI_ERROR; @@ -294,8 +310,9 @@ static PyObject * zipimporter_load_module(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - PyObject *code, *mod, *dict; - char *fullname, *modpath; + PyObject *code = NULL, *mod, *dict; + char *fullname; + PyObject *modpath = NULL, *modpath_bytes; int ispackage; if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", @@ -304,13 +321,11 @@ zipimporter_load_module(PyObject *obj, PyObject *args) code = get_module_code(self, fullname, &ispackage, &modpath); if (code == NULL) - return NULL; + goto error; mod = PyImport_AddModule(fullname); - if (mod == NULL) { - Py_DECREF(code); - return NULL; - } + if (mod == NULL) + goto error; dict = PyModule_GetDict(mod); /* mod.__loader__ = self */ @@ -339,15 +354,24 @@ zipimporter_load_module(PyObject *obj, PyObject *args) if (err != 0) goto error; } - mod = PyImport_ExecCodeModuleEx(fullname, code, modpath); - Py_DECREF(code); + modpath_bytes = PyUnicode_EncodeFSDefault(modpath); + if (modpath_bytes == NULL) + goto error; + mod = PyImport_ExecCodeModuleEx(fullname, code, + PyBytes_AS_STRING(modpath_bytes)); + Py_DECREF(modpath_bytes); + Py_CLEAR(code); + if (mod == NULL) + goto error; + if (Py_VerboseFlag) - PySys_WriteStderr("import %s # loaded from Zip %s\n", - fullname, modpath); + PySys_FormatStderr("import %s # loaded from Zip %U\n", + fullname, modpath); + Py_DECREF(modpath); return mod; error: - Py_DECREF(code); - Py_DECREF(mod); + Py_XDECREF(code); + Py_XDECREF(modpath); return NULL; } @@ -357,21 +381,22 @@ zipimporter_get_filename(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *code; - char *fullname, *modpath; + char *fullname; + PyObject *modpath; int ispackage; if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", - &fullname)) - return NULL; + &fullname)) + return NULL; /* Deciding the filename requires working out where the code would come from if the module was actually loaded */ code = get_module_code(self, fullname, &ispackage, &modpath); if (code == NULL) - return NULL; + return NULL; Py_DECREF(code); /* Only need the path info */ - return PyUnicode_FromString(modpath); + return modpath; } /* Return a bool signifying whether the module is a package or not. */ @@ -390,8 +415,7 @@ zipimporter_is_package(PyObject *obj, PyObject *args) if (mi == MI_ERROR) return NULL; if (mi == MI_NOT_FOUND) { - PyErr_Format(ZipImportError, "can't find module '%.200s'", - fullname); + PyErr_Format(ZipImportError, "can't find module '%s'", fullname); return NULL; } return PyBool_FromLong(mi == MI_PACKAGE); @@ -401,42 +425,52 @@ static PyObject * zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - char *path; + PyObject *pathobj, *key; + const Py_UNICODE *path; #ifdef ALTSEP - char *p, buf[MAXPATHLEN + 1]; + Py_UNICODE *p, buf[MAXPATHLEN + 1]; #endif + Py_UNICODE *archive; PyObject *toc_entry; - Py_ssize_t len; - char *archive_str; + Py_ssize_t path_len, len; - if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path)) + if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj)) return NULL; + path_len = PyUnicode_GET_SIZE(pathobj); + path = PyUnicode_AS_UNICODE(pathobj); #ifdef ALTSEP - if (strlen(path) >= MAXPATHLEN) { + if (path_len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "path too long"); return NULL; } - strcpy(buf, path); + Py_UNICODE_strcpy(buf, path); for (p = buf; *p; p++) { if (*p == ALTSEP) *p = SEP; } path = buf; #endif - archive_str = _PyUnicode_AsStringAndSize(self->archive, &len); - if ((size_t)len < strlen(path) && - strncmp(path, archive_str, len) == 0 && + archive = PyUnicode_AS_UNICODE(self->archive); + len = PyUnicode_GET_SIZE(self->archive); + if ((size_t)len < Py_UNICODE_strlen(path) && + Py_UNICODE_strncmp(path, archive, len) == 0 && path[len] == SEP) { - path = path + len + 1; + path += len + 1; + path_len -= len + 1; } - toc_entry = PyDict_GetItemString(self->files, path); + key = PyUnicode_FromUnicode(path, path_len); + if (key == NULL) + return NULL; + toc_entry = PyDict_GetItem(self->files, key); if (toc_entry == NULL) { - PyErr_SetFromErrnoWithFilename(PyExc_IOError, path); + PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key); + Py_DECREF(key); return NULL; } - return get_data(archive_str, toc_entry); + Py_DECREF(key); + return get_data(self->archive, toc_entry); } static PyObject * @@ -467,13 +501,12 @@ zipimporter_get_source(PyObject *obj, PyObject *args) if (mi == MI_ERROR) return NULL; if (mi == MI_NOT_FOUND) { - PyErr_Format(ZipImportError, "can't find module '%.200s'", - fullname); + PyErr_Format(ZipImportError, "can't find module '%s'", fullname); return NULL; } subname = get_subname(fullname); - len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); + len = make_filename(self->prefix, subname, path, sizeof(path)); if (len < 0) return NULL; @@ -486,9 +519,13 @@ zipimporter_get_source(PyObject *obj, PyObject *args) toc_entry = PyDict_GetItemString(self->files, path); if (toc_entry != NULL) { - PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry); - PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes)); - Py_XDECREF(bytes); + PyObject *res, *bytes; + bytes = get_data(self->archive, toc_entry); + if (bytes == NULL) + return NULL; + res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes), + PyBytes_GET_SIZE(bytes)); + Py_DECREF(bytes); return res; } @@ -656,7 +693,8 @@ get_long(unsigned char *buf) { A toc_entry is a tuple: - (__file__, # value to use for __file__, available for all files + (__file__, # value to use for __file__, available for all files, + # encoded to the filesystem encoding compress, # compression kind; 0 for uncompressed data_size, # size of compressed data on disk file_size, # size of decompressed data @@ -664,51 +702,54 @@ get_long(unsigned char *buf) { time, # mod time of file (in dos format) date, # mod data of file (in dos format) crc, # crc checksum of the data - ) + ) Directories can be recognized by the trailing SEP in the name, data_size and file_offset are 0. */ static PyObject * -read_directory(char *archive) +read_directory(PyObject *archive_obj) { + /* FIXME: work on Py_UNICODE* instead of char* */ PyObject *files = NULL; FILE *fp; + unsigned short flags; long compress, crc, data_size, file_size, file_offset, date, time; long header_offset, name_size, header_size, header_position; long i, l, count; size_t length; - char path[MAXPATHLEN + 5]; + Py_UNICODE path[MAXPATHLEN + 5]; char name[MAXPATHLEN + 5]; + PyObject *nameobj = NULL; char *p, endof_central_dir[22]; long arc_offset; /* offset from beginning of file to start of zip-archive */ + PyObject *pathobj; + const char *charset; + int bootstrap; - if (strlen(archive) > MAXPATHLEN) { + if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { PyErr_SetString(PyExc_OverflowError, "Zip path name is too long"); return NULL; } - strcpy(path, archive); + Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj)); - fp = fopen(archive, "rb"); + fp = _Py_fopen(archive_obj, "rb"); if (fp == NULL) { - PyErr_Format(ZipImportError, "can't open Zip file: " - "'%.200s'", archive); + PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj); return NULL; } fseek(fp, -22, SEEK_END); header_position = ftell(fp); if (fread(endof_central_dir, 1, 22, fp) != 22) { fclose(fp); - PyErr_Format(ZipImportError, "can't read Zip file: " - "'%.200s'", archive); + PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ fclose(fp); - PyErr_Format(ZipImportError, "not a Zip file: " - "'%.200s'", archive); + PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj); return NULL; } @@ -721,7 +762,7 @@ read_directory(char *archive) if (files == NULL) goto error; - length = (long)strlen(path); + length = Py_UNICODE_strlen(path); path[length] = SEP; /* Start of Central Directory */ @@ -734,7 +775,8 @@ read_directory(char *archive) l = PyMarshal_ReadLongFromFile(fp); if (l != 0x02014B50) break; /* Bad: Central Dir File Header */ - fseek(fp, header_offset + 10, 0); + fseek(fp, header_offset + 8, 0); + flags = (unsigned short)PyMarshal_ReadShortFromFile(fp); compress = PyMarshal_ReadShortFromFile(fp); time = PyMarshal_ReadShortFromFile(fp); date = PyMarshal_ReadShortFromFile(fp); @@ -760,13 +802,41 @@ read_directory(char *archive) *p = 0; /* Add terminating null byte */ header_offset += header_size; - strncpy(path + length + 1, name, MAXPATHLEN - length - 1); + bootstrap = 0; + if (flags & 0x0800) + charset = "utf-8"; + else if (!PyThreadState_GET()->interp->codecs_initialized) { + /* During bootstrap, we may need to load the encodings + package from a ZIP file. But the cp437 encoding is implemented + in Python in the encodings package. + + Break out of this dependency by assuming that the path to + the encodings module is ASCII-only. */ + charset = "ascii"; + bootstrap = 1; + } + else + charset = "cp437"; + nameobj = PyUnicode_Decode(name, name_size, charset, NULL); + if (nameobj == NULL) { + if (bootstrap) + PyErr_Format(PyExc_NotImplementedError, + "bootstrap issue: python%i%i.zip contains non-ASCII " + "filenames without the unicode flag", + PY_MAJOR_VERSION, PY_MINOR_VERSION); + goto error; + } + Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); - t = Py_BuildValue("siiiiiii", path, compress, data_size, + pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path)); + if (pathobj == NULL) + goto error; + t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size, file_size, file_offset, time, date, crc); if (t == NULL) goto error; - err = PyDict_SetItemString(files, name, t); + err = PyDict_SetItem(files, nameobj, t); + Py_CLEAR(nameobj); Py_DECREF(t); if (err != 0) goto error; @@ -774,12 +844,13 @@ read_directory(char *archive) } fclose(fp); if (Py_VerboseFlag) - PySys_WriteStderr("# zipimport: found %ld names in %s\n", - count, archive); + PySys_FormatStderr("# zipimport: found %ld names in %U\n", + count, archive_obj); return files; error: fclose(fp); Py_XDECREF(files); + Py_XDECREF(nameobj); return NULL; } @@ -818,7 +889,7 @@ get_decompress_func(void) /* Given a path to a Zip file and a toc_entry, return the (uncompressed) data as a new reference. */ static PyObject * -get_data(char *archive, PyObject *toc_entry) +get_data(PyObject *archive, PyObject *toc_entry) { PyObject *raw_data, *data = NULL, *decompress; char *buf; @@ -826,20 +897,20 @@ get_data(char *archive, PyObject *toc_entry) int err; Py_ssize_t bytes_read = 0; long l; - char *datapath; + PyObject *datapath; long compress, data_size, file_size, file_offset, bytes_size; long time, date, crc; - if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress, + if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress, &data_size, &file_size, &file_offset, &time, &date, &crc)) { return NULL; } - fp = fopen(archive, "rb"); + fp = _Py_fopen(archive, "rb"); if (!fp) { PyErr_Format(PyExc_IOError, - "zipimport: can not open file %s", archive); + "zipimport: can not open file %U", archive); return NULL; } @@ -849,7 +920,7 @@ get_data(char *archive, PyObject *toc_entry) if (l != 0x04034B50) { /* Bad: Local File Header */ PyErr_Format(ZipImportError, - "bad local file header in %s", + "bad local file header in %U", archive); fclose(fp); return NULL; @@ -962,7 +1033,7 @@ unmarshal_code(char *pathname, PyObject *data, time_t mtime) if (!PyCode_Check(code)) { Py_DECREF(code); PyErr_Format(PyExc_TypeError, - "compiled module %.200s is not a code object", + "compiled module %s is not a code object", pathname); return NULL; } @@ -1079,24 +1150,23 @@ get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, time_t mtime, PyObject *toc_entry) { PyObject *data, *code; - char *modpath; - char *archive = _PyUnicode_AsString(self->archive); + PyObject *modpath; - if (archive == NULL) - return NULL; - - data = get_data(archive, toc_entry); + data = get_data(self->archive, toc_entry); if (data == NULL) return NULL; - modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0)); - - if (isbytecode) { - code = unmarshal_code(modpath, data, mtime); - } - else { - code = compile_source(modpath, data); + modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0)); + if (modpath == NULL) { + Py_DECREF(data); + return NULL; } + + if (isbytecode) + code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime); + else + code = compile_source(PyBytes_AS_STRING(modpath), data); + Py_DECREF(modpath); Py_DECREF(data); return code; } @@ -1105,7 +1175,7 @@ get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, 'fullname'. */ static PyObject * get_module_code(ZipImporter *self, char *fullname, - int *p_ispackage, char **p_modpath) + int *p_ispackage, PyObject **p_modpath) { PyObject *toc_entry; char *subname, path[MAXPATHLEN + 1]; @@ -1114,7 +1184,7 @@ get_module_code(ZipImporter *self, char *fullname, subname = get_subname(fullname); - len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); + len = make_filename(self->prefix, subname, path, sizeof(path)); if (len < 0) return NULL; @@ -1123,9 +1193,8 @@ get_module_code(ZipImporter *self, char *fullname, strcpy(path + len, zso->suffix); if (Py_VerboseFlag > 1) - PySys_WriteStderr("# trying %s%c%s\n", - _PyUnicode_AsString(self->archive), - (int)SEP, path); + PySys_FormatStderr("# trying %U%c%s\n", + self->archive, (int)SEP, path); toc_entry = PyDict_GetItemString(self->files, path); if (toc_entry != NULL) { time_t mtime = 0; @@ -1145,13 +1214,14 @@ get_module_code(ZipImporter *self, char *fullname, Py_DECREF(code); continue; } - if (code != NULL && p_modpath != NULL) - *p_modpath = _PyUnicode_AsString( - PyTuple_GetItem(toc_entry, 0)); + if (code != NULL && p_modpath != NULL) { + *p_modpath = PyTuple_GetItem(toc_entry, 0); + Py_INCREF(*p_modpath); + } return code; } } - PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname); + PyErr_Format(ZipImportError, "can't find module '%s'", fullname); return NULL; } |