diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-03-15 00:46:50 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-03-15 00:46:50 (GMT) |
commit | f6b563af2d1d8dc1782546436990c0517f226ccf (patch) | |
tree | 38bdcdb5e8d28a8b3092ccc01262c0b0533c6ca9 /Modules/zipimport.c | |
parent | 942003ccf9811270e22445f62a66ab854003255d (diff) | |
download | cpython-f6b563af2d1d8dc1782546436990c0517f226ccf.zip cpython-f6b563af2d1d8dc1782546436990c0517f226ccf.tar.gz cpython-f6b563af2d1d8dc1782546436990c0517f226ccf.tar.bz2 |
Issue #3080: zipimport has a full unicode suppport
- Use Unicode for module paths and names, self->archive and self->prefix
- Format module names and paths use %R instead of '%U' to escape surrogate
characters (PEP 383)
- Use PyImport_ExecCodeModuleObject() instead of PyImport_ExecCodeModuleEx()
- Use PyImport_AddModuleObject() instead of PyImport_AddModule()
Diffstat (limited to 'Modules/zipimport.c')
-rw-r--r-- | Modules/zipimport.c | 303 |
1 files changed, 168 insertions, 135 deletions
diff --git a/Modules/zipimport.c b/Modules/zipimport.c index 88a27c1..4079261 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -49,7 +49,7 @@ static PyObject *zip_directory_cache = NULL; /* forward decls */ static PyObject *read_directory(PyObject *archive); static PyObject *get_data(PyObject *archive, PyObject *toc_entry); -static PyObject *get_module_code(ZipImporter *self, char *fullname, +static PyObject *get_module_code(ZipImporter *self, PyObject *fullname, int *p_ispackage, PyObject **p_modpath); @@ -202,49 +202,50 @@ zipimporter_repr(ZipImporter *self) } /* return fullname.split(".")[-1] */ -static char * -get_subname(char *fullname) +static PyObject * +get_subname(PyObject *fullname) { - char *subname = strrchr(fullname, '.'); - if (subname == NULL) - subname = fullname; - else + Py_ssize_t len; + Py_UNICODE *subname; + subname = Py_UNICODE_strrchr(PyUnicode_AS_UNICODE(fullname), '.'); + if (subname == NULL) { + Py_INCREF(fullname); + return fullname; + } else { subname++; - return subname; + len = PyUnicode_GET_SIZE(fullname); + len -= subname - PyUnicode_AS_UNICODE(fullname); + return PyUnicode_FromUnicode(subname, len); + } } /* Given a (sub)modulename, write the potential file path in the archive (without extension) to the path buffer. Return the - length of the resulting string. */ -static int -make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize) + length of the resulting string. + + return self.prefix + name.replace('.', os.sep) */ +static PyObject* +make_filename(PyObject *prefix, PyObject *name) { - size_t len; - char *p; - PyObject *prefix; + PyObject *pathobj; + Py_UNICODE *p; - prefix = PyUnicode_EncodeFSDefault(prefix_obj); - if (prefix == NULL) - return -1; - len = PyBytes_GET_SIZE(prefix); + pathobj = PyUnicode_FromUnicode(NULL, + PyUnicode_GET_SIZE(prefix) + + PyUnicode_GET_SIZE(name)); + if (pathobj == NULL) + return NULL; - /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */ - if (len + strlen(name) + 13 >= pathsize - 1) { - PyErr_SetString(ZipImportError, "path too long"); - Py_DECREF(prefix); - return -1; - } + p = PyUnicode_AS_UNICODE(pathobj); - strcpy(path, PyBytes_AS_STRING(prefix)); - Py_DECREF(prefix); - strcpy(path + len, name); - for (p = path + len; *p; p++) { + Py_UNICODE_strcpy(p, PyUnicode_AS_UNICODE(prefix)); + p += PyUnicode_GET_SIZE(prefix); + Py_UNICODE_strcpy(p, PyUnicode_AS_UNICODE(name)); + for (; *p; p++) { if (*p == '.') *p = SEP; } - len += strlen(name); - assert(len < INT_MAX); - return (int)len; + return pathobj; } enum zi_module_info { @@ -256,27 +257,38 @@ enum zi_module_info { /* Return some information about a module. */ static enum zi_module_info -get_module_info(ZipImporter *self, char *fullname) +get_module_info(ZipImporter *self, PyObject *fullname) { - char *subname, path[MAXPATHLEN + 1]; - int len; + PyObject *subname; + PyObject *path, *fullpath, *item; struct st_zip_searchorder *zso; subname = get_subname(fullname); + if (subname == NULL) + return MI_ERROR; - len = make_filename(self->prefix, subname, path, sizeof(path)); - if (len < 0) + path = make_filename(self->prefix, subname); + Py_DECREF(subname); + if (path == NULL) return MI_ERROR; for (zso = zip_searchorder; *zso->suffix; zso++) { - strcpy(path + len, zso->suffix); - if (PyDict_GetItemString(self->files, path) != NULL) { + fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix); + if (fullpath == NULL) { + Py_DECREF(path); + return MI_ERROR; + } + item = PyDict_GetItem(self->files, fullpath); + Py_DECREF(fullpath); + if (item != NULL) { + Py_DECREF(path); if (zso->type & IS_PACKAGE) return MI_PACKAGE; else return MI_MODULE; } } + Py_DECREF(path); return MI_NOT_FOUND; } @@ -287,10 +299,10 @@ zipimporter_find_module(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *path = NULL; - char *fullname; + PyObject *fullname; enum zi_module_info mi; - if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module", + if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path)) return NULL; @@ -311,11 +323,11 @@ zipimporter_load_module(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *code = NULL, *mod, *dict; - char *fullname; - PyObject *modpath = NULL, *modpath_bytes; + PyObject *fullname; + PyObject *modpath = NULL; int ispackage; - if (!PyArg_ParseTuple(args, "s:zipimporter.load_module", + if (!PyArg_ParseTuple(args, "U:zipimporter.load_module", &fullname)) return NULL; @@ -323,7 +335,7 @@ zipimporter_load_module(PyObject *obj, PyObject *args) if (code == NULL) goto error; - mod = PyImport_AddModule(fullname); + mod = PyImport_AddModuleObject(fullname); if (mod == NULL) goto error; dict = PyModule_GetDict(mod); @@ -336,17 +348,17 @@ zipimporter_load_module(PyObject *obj, PyObject *args) /* add __path__ to the module *before* the code gets executed */ PyObject *pkgpath, *fullpath; - char *subname = get_subname(fullname); + PyObject *subname = get_subname(fullname); int err; - fullpath = PyUnicode_FromFormat("%U%c%U%s", + fullpath = PyUnicode_FromFormat("%U%c%U%U", self->archive, SEP, self->prefix, subname); + Py_DECREF(subname); if (fullpath == NULL) goto error; - pkgpath = Py_BuildValue("[O]", fullpath); - Py_DECREF(fullpath); + pkgpath = Py_BuildValue("[N]", fullpath); if (pkgpath == NULL) goto error; err = PyDict_SetItemString(dict, "__path__", pkgpath); @@ -354,18 +366,13 @@ zipimporter_load_module(PyObject *obj, PyObject *args) if (err != 0) goto error; } - modpath_bytes = PyUnicode_EncodeFSDefault(modpath); - if (modpath_bytes == NULL) - goto error; - mod = PyImport_ExecCodeModuleEx(fullname, code, - PyBytes_AS_STRING(modpath_bytes)); - Py_DECREF(modpath_bytes); + mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL); Py_CLEAR(code); if (mod == NULL) goto error; if (Py_VerboseFlag) - PySys_FormatStderr("import %s # loaded from Zip %U\n", + PySys_FormatStderr("import %U # loaded from Zip %U\n", fullname, modpath); Py_DECREF(modpath); return mod; @@ -380,12 +387,10 @@ static PyObject * zipimporter_get_filename(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - PyObject *code; - char *fullname; - PyObject *modpath; + PyObject *fullname, *code, *modpath; int ispackage; - if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename", + if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename", &fullname)) return NULL; @@ -404,10 +409,10 @@ static PyObject * zipimporter_is_package(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - char *fullname; + PyObject *fullname; enum zi_module_info mi; - if (!PyArg_ParseTuple(args, "s:zipimporter.is_package", + if (!PyArg_ParseTuple(args, "U:zipimporter.is_package", &fullname)) return NULL; @@ -415,7 +420,7 @@ zipimporter_is_package(PyObject *obj, PyObject *args) if (mi == MI_ERROR) return NULL; if (mi == MI_NOT_FOUND) { - PyErr_Format(ZipImportError, "can't find module '%s'", fullname); + PyErr_Format(ZipImportError, "can't find module %R", fullname); return NULL; } return PyBool_FromLong(mi == MI_PACKAGE); @@ -477,9 +482,9 @@ static PyObject * zipimporter_get_code(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - char *fullname; + PyObject *fullname; - if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname)) + if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname)) return NULL; return get_module_code(self, fullname, NULL, NULL); @@ -490,34 +495,39 @@ zipimporter_get_source(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; PyObject *toc_entry; - char *fullname, *subname, path[MAXPATHLEN+1]; - int len; + PyObject *fullname, *subname, *path, *fullpath; enum zi_module_info mi; - if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname)) + if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname)) return NULL; mi = get_module_info(self, fullname); if (mi == MI_ERROR) return NULL; if (mi == MI_NOT_FOUND) { - PyErr_Format(ZipImportError, "can't find module '%s'", fullname); + PyErr_Format(ZipImportError, "can't find module %R", fullname); return NULL; } + subname = get_subname(fullname); + if (subname == NULL) + return NULL; - len = make_filename(self->prefix, subname, path, sizeof(path)); - if (len < 0) + path = make_filename(self->prefix, subname); + Py_DECREF(subname); + if (path == NULL) return NULL; - if (mi == MI_PACKAGE) { - path[len] = SEP; - strcpy(path + len + 1, "__init__.py"); - } + if (mi == MI_PACKAGE) + fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP); else - strcpy(path + len, ".py"); + fullpath = PyUnicode_FromFormat("%U.py", path); + Py_DECREF(path); + if (fullpath == NULL) + return NULL; - toc_entry = PyDict_GetItemString(self->files, path); + toc_entry = PyDict_GetItem(self->files, fullpath); + Py_DECREF(fullpath); if (toc_entry != NULL) { PyObject *res, *bytes; bytes = get_data(self->archive, toc_entry); @@ -708,9 +718,8 @@ get_long(unsigned char *buf) { data_size and file_offset are 0. */ static PyObject * -read_directory(PyObject *archive_obj) +read_directory(PyObject *archive) { - /* FIXME: work on Py_UNICODE* instead of char* */ PyObject *files = NULL; FILE *fp; unsigned short flags; @@ -727,29 +736,29 @@ read_directory(PyObject *archive_obj) const char *charset; int bootstrap; - if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { + if (PyUnicode_GET_SIZE(archive) > MAXPATHLEN) { PyErr_SetString(PyExc_OverflowError, "Zip path name is too long"); return NULL; } - Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj)); + Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive)); - fp = _Py_fopen(archive_obj, "rb"); + fp = _Py_fopen(archive, "rb"); if (fp == NULL) { - PyErr_Format(ZipImportError, "can't open Zip file: '%U'", archive_obj); + PyErr_Format(ZipImportError, "can't open Zip file: %R", archive); return NULL; } fseek(fp, -22, SEEK_END); header_position = ftell(fp); if (fread(endof_central_dir, 1, 22, fp) != 22) { fclose(fp); - PyErr_Format(ZipImportError, "can't read Zip file: '%U'", archive_obj); + PyErr_Format(ZipImportError, "can't read Zip file: %R", archive); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ fclose(fp); - PyErr_Format(ZipImportError, "not a Zip file: '%U'", archive_obj); + PyErr_Format(ZipImportError, "not a Zip file: %R", archive); return NULL; } @@ -826,7 +835,9 @@ read_directory(PyObject *archive_obj) PY_MAJOR_VERSION, PY_MINOR_VERSION); goto error; } - Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); + Py_UNICODE_strncpy(path + length + 1, + PyUnicode_AS_UNICODE(nameobj), + MAXPATHLEN - length - 1); pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path)); if (pathobj == NULL) @@ -844,8 +855,8 @@ read_directory(PyObject *archive_obj) } fclose(fp); if (Py_VerboseFlag) - PySys_FormatStderr("# zipimport: found %ld names in %U\n", - count, archive_obj); + PySys_FormatStderr("# zipimport: found %ld names in %R\n", + count, archive); return files; error: fclose(fp); @@ -999,7 +1010,7 @@ eq_mtime(time_t t1, time_t t2) to .py if available and we don't want to mask other errors). Returns a new reference. */ static PyObject * -unmarshal_code(char *pathname, PyObject *data, time_t mtime) +unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime) { PyObject *code; char *buf = PyBytes_AsString(data); @@ -1013,8 +1024,8 @@ unmarshal_code(char *pathname, PyObject *data, time_t mtime) if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) { if (Py_VerboseFlag) - PySys_WriteStderr("# %s has bad magic\n", - pathname); + PySys_FormatStderr("# %R has bad magic\n", + pathname); Py_INCREF(Py_None); return Py_None; /* signal caller to try alternative */ } @@ -1022,8 +1033,8 @@ unmarshal_code(char *pathname, PyObject *data, time_t mtime) if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), mtime)) { if (Py_VerboseFlag) - PySys_WriteStderr("# %s has bad mtime\n", - pathname); + PySys_FormatStderr("# %R has bad mtime\n", + pathname); Py_INCREF(Py_None); return Py_None; /* signal caller to try alternative */ } @@ -1034,7 +1045,7 @@ unmarshal_code(char *pathname, PyObject *data, time_t mtime) if (!PyCode_Check(code)) { Py_DECREF(code); PyErr_Format(PyExc_TypeError, - "compiled module %s is not a code object", + "compiled module %R is not a code object", pathname); return NULL; } @@ -1048,11 +1059,12 @@ unmarshal_code(char *pathname, PyObject *data, time_t mtime) static PyObject * normalize_line_endings(PyObject *source) { - char *buf, *q, *p = PyBytes_AsString(source); + char *buf, *q, *p; PyObject *fixed_source; int len = 0; - if (!p) { + p = PyBytes_AsString(source); + if (p == NULL) { return PyBytes_FromStringAndSize("\n\0", 2); } @@ -1085,16 +1097,24 @@ normalize_line_endings(PyObject *source) /* Given a string buffer containing Python source code, compile it return and return a code object as a new reference. */ static PyObject * -compile_source(char *pathname, PyObject *source) +compile_source(PyObject *pathname, PyObject *source) { - PyObject *code, *fixed_source; + PyObject *code, *fixed_source, *pathbytes; + + pathbytes = PyUnicode_EncodeFSDefault(pathname); + if (pathbytes == NULL) + return NULL; fixed_source = normalize_line_endings(source); - if (fixed_source == NULL) + if (fixed_source == NULL) { + Py_DECREF(pathbytes); return NULL; + } - code = Py_CompileString(PyBytes_AsString(fixed_source), pathname, + code = Py_CompileString(PyBytes_AsString(fixed_source), + PyBytes_AsString(pathbytes), Py_file_input); + Py_DECREF(pathbytes); Py_DECREF(fixed_source); return code; } @@ -1123,14 +1143,19 @@ parse_dostime(int dostime, int dosdate) modification time of the matching .py file, or 0 if no source is available. */ static time_t -get_mtime_of_source(ZipImporter *self, char *path) +get_mtime_of_source(ZipImporter *self, PyObject *path) { - PyObject *toc_entry; - time_t mtime = 0; - Py_ssize_t lastchar = strlen(path) - 1; - char savechar = path[lastchar]; - path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ - toc_entry = PyDict_GetItemString(self->files, path); + PyObject *toc_entry, *stripped; + time_t mtime; + + /* strip 'c' or 'o' from *.py[co] */ + stripped = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(path), + PyUnicode_GET_SIZE(path) - 1); + if (stripped == NULL) + return (time_t)-1; + + toc_entry = PyDict_GetItem(self->files, stripped); + Py_DECREF(stripped); if (toc_entry != NULL && PyTuple_Check(toc_entry) && PyTuple_Size(toc_entry) == 8) { /* fetch the time stamp of the .py file for comparison @@ -1139,8 +1164,8 @@ get_mtime_of_source(ZipImporter *self, char *path) time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5)); date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6)); mtime = parse_dostime(time, date); - } - path[lastchar] = savechar; + } else + mtime = 0; return mtime; } @@ -1150,24 +1175,17 @@ static PyObject * get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, time_t mtime, PyObject *toc_entry) { - PyObject *data, *code; - PyObject *modpath; + PyObject *data, *modpath, *code; data = get_data(self->archive, toc_entry); if (data == NULL) return NULL; - modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0)); - if (modpath == NULL) { - Py_DECREF(data); - return NULL; - } - + modpath = PyTuple_GetItem(toc_entry, 0); if (isbytecode) - code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime); + code = unmarshal_code(modpath, data, mtime); else - code = compile_source(PyBytes_AS_STRING(modpath), data); - Py_DECREF(modpath); + code = compile_source(modpath, data); Py_DECREF(data); return code; } @@ -1175,35 +1193,45 @@ get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, /* Get the code object associated with the module specified by 'fullname'. */ static PyObject * -get_module_code(ZipImporter *self, char *fullname, +get_module_code(ZipImporter *self, PyObject *fullname, int *p_ispackage, PyObject **p_modpath) { - PyObject *toc_entry; - char *subname, path[MAXPATHLEN + 1]; - int len; + PyObject *code, *toc_entry, *subname; + PyObject *path, *fullpath; struct st_zip_searchorder *zso; subname = get_subname(fullname); + if (subname == NULL) + return NULL; - len = make_filename(self->prefix, subname, path, sizeof(path)); - if (len < 0) + path = make_filename(self->prefix, subname); + Py_DECREF(subname); + if (path == NULL) return NULL; for (zso = zip_searchorder; *zso->suffix; zso++) { - PyObject *code = NULL; + code = NULL; + + fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix); + if (fullpath == NULL) + goto exit; - strcpy(path + len, zso->suffix); if (Py_VerboseFlag > 1) - PySys_FormatStderr("# trying %U%c%s\n", - self->archive, (int)SEP, path); - toc_entry = PyDict_GetItemString(self->files, path); + PySys_FormatStderr("# trying %U%c%U\n", + self->archive, (int)SEP, fullpath); + toc_entry = PyDict_GetItem(self->files, fullpath); if (toc_entry != NULL) { time_t mtime = 0; int ispackage = zso->type & IS_PACKAGE; int isbytecode = zso->type & IS_BYTECODE; - if (isbytecode) - mtime = get_mtime_of_source(self, path); + if (isbytecode) { + mtime = get_mtime_of_source(self, fullpath); + if (mtime == (time_t)-1 && PyErr_Occurred()) { + goto exit; + } + } + Py_CLEAR(fullpath); if (p_ispackage != NULL) *p_ispackage = ispackage; code = get_code_from_data(self, ispackage, @@ -1219,11 +1247,16 @@ get_module_code(ZipImporter *self, char *fullname, *p_modpath = PyTuple_GetItem(toc_entry, 0); Py_INCREF(*p_modpath); } - return code; + goto exit; } + else + Py_CLEAR(fullpath); } - PyErr_Format(ZipImportError, "can't find module '%s'", fullname); - return NULL; + PyErr_Format(ZipImportError, "can't find module %R", fullname); +exit: + Py_DECREF(path); + Py_XDECREF(fullpath); + return code; } |