From 0ad59d467d06c8c9dce81658f4f278783cb70b9f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 30 Mar 2009 22:01:35 +0000 Subject: Issue #5604: non-ASCII characters in module name passed to imp.find_module() were converted to UTF-8 while the path is converted to the default filesystem encoding, causing nonsense. Thanks to Andrew Svetlov. (This time to the right branch. Will block duplicate merge to 3.0.2.) --- Lib/test/test_imp.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Misc/ACKS | 1 + Misc/NEWS | 4 +++ Python/import.c | 57 +++++++++++++++++++++++++++++------------ 4 files changed, 117 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 1e04f5a..cc6792a 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -1,4 +1,7 @@ import imp +import locale +import os +import os.path import sys import unittest from test import support @@ -75,6 +78,74 @@ class ImportTests(unittest.TestCase): support.unlink(temp_mod_name + '.pyc') support.unlink(temp_mod_name + '.pyo') + def test_issue5604(self): + # Test cannot cover imp.load_compiled function. + # Martin von Loewis note what shared library cannot have non-ascii + # character because init_xxx function cannot be compiled + # and issue never happens for dynamic modules. + # But sources modified to follow generic way for processing pathes. + + locale_encoding = locale.getpreferredencoding() + + # covers utf-8 and Windows ANSI code pages + # one non-space symbol from every page + # (http://en.wikipedia.org/wiki/Code_page) + known_locales = { + 'utf-8' : b'\xe4', + 'cp1250' : b'\x8C', + 'cp1251' : b'\xc0', + 'cp1252' : b'\xc0', + 'cp1253' : b'\xc1', + 'cp1254' : b'\xc0', + 'cp1255' : b'\xe0', + 'cp1256' : b'\xe0', + 'cp1257' : b'\xc0', + 'cp1258' : b'\xc0', + } + + special_char = known_locales.get(locale_encoding) + if special_char: + encoded_char = special_char.decode(locale_encoding) + temp_mod_name = 'test_imp_helper_' + encoded_char + test_package_name = 'test_imp_helper_package_' + encoded_char + init_file_name = os.path.join(test_package_name, '__init__.py') + try: + with open(temp_mod_name + '.py', 'w') as file: + file.write('a = 1\n') + file, filename, info = imp.find_module(temp_mod_name) + self.assertNotEquals(None, file) + self.assertTrue(filename[:-3].endswith(temp_mod_name)) + self.assertEquals('.py', info[0]) + self.assertEquals('U', info[1]) + self.assertEquals(imp.PY_SOURCE, info[2]) + + mod = imp.load_module(temp_mod_name, file, filename, info) + self.assertEquals(1, mod.a) + file.close() + + mod = imp.load_source(temp_mod_name, temp_mod_name + '.py') + self.assertEquals(1, mod.a) + + mod = imp.load_compiled(temp_mod_name, temp_mod_name + '.pyc') + self.assertEquals(1, mod.a) + + if not os.path.exists(test_package_name): + os.mkdir(test_package_name) + with open(init_file_name, 'w') as file: + file.write('b = 2\n') + package = imp.load_package(test_package_name, test_package_name) + self.assertEquals(2, package.b) + finally: + support.unlink(temp_mod_name + '.py') + support.unlink(temp_mod_name + '.pyc') + support.unlink(temp_mod_name + '.pyo') + + support.unlink(init_file_name + '.py') + support.unlink(init_file_name + '.pyc') + support.unlink(init_file_name + '.pyo') + support.rmtree(test_package_name) + + def test_reload(self): import marshal imp.reload(marshal) diff --git a/Misc/ACKS b/Misc/ACKS index 85a443b..03ed92d 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -688,6 +688,7 @@ Nathan Sullivan Mark Summerfield Hisao Suzuki Kalle Svensson +Andrew Svetlov Paul Swartz Thenault Sylvain Geoff Talvola diff --git a/Misc/NEWS b/Misc/NEWS index f2888be..a12b8ea 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.1 alpha 2? Core and Builtins ----------------- +- Issue #5604: non-ASCII characters in module name passed to + imp.find_module() were converted to UTF-8 while the path is + converted to the default filesystem encoding, causing nonsense. + - Issue #5126: str.isprintable() returned False for space characters. - Issue #4688: Add a heuristic so that tuples and dicts containing only diff --git a/Python/import.c b/Python/import.c index 9c70ed8..2be3308 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3040,15 +3040,20 @@ imp_load_compiled(PyObject *self, PyObject *args) PyObject *fob = NULL; PyObject *m; FILE *fp; - if (!PyArg_ParseTuple(args, "ss|O:load_compiled", - &name, &pathname, &fob)) + if (!PyArg_ParseTuple(args, "ses|O:load_compiled", + &name, + Py_FileSystemDefaultEncoding, &pathname, + &fob)) return NULL; fp = get_file(pathname, fob, "rb"); - if (fp == NULL) + if (fp == NULL) { + PyMem_Free(pathname); return NULL; + } m = load_compiled_module(name, pathname, fp); if (fob == NULL) fclose(fp); + PyMem_Free(pathname); return m; } @@ -3062,15 +3067,20 @@ imp_load_dynamic(PyObject *self, PyObject *args) PyObject *fob = NULL; PyObject *m; FILE *fp = NULL; - if (!PyArg_ParseTuple(args, "ss|O:load_dynamic", - &name, &pathname, &fob)) + if (!PyArg_ParseTuple(args, "ses|O:load_dynamic", + &name, + Py_FileSystemDefaultEncoding, &pathname, + &fob)) return NULL; if (fob) { fp = get_file(pathname, fob, "r"); - if (fp == NULL) + if (fp == NULL) { + PyMem_Free(pathname); return NULL; + } } m = _PyImport_LoadDynamicModule(name, pathname, fp); + PyMem_Free(pathname); return m; } @@ -3084,12 +3094,16 @@ imp_load_source(PyObject *self, PyObject *args) PyObject *fob = NULL; PyObject *m; FILE *fp; - if (!PyArg_ParseTuple(args, "ss|O:load_source", - &name, &pathname, &fob)) + if (!PyArg_ParseTuple(args, "ses|O:load_source", + &name, + Py_FileSystemDefaultEncoding, &pathname, + &fob)) return NULL; fp = get_file(pathname, fob, "r"); - if (fp == NULL) + if (fp == NULL) { + PyMem_Free(pathname); return NULL; + } m = load_source_module(name, pathname, fp); if (fob == NULL) fclose(fp); @@ -3102,13 +3116,15 @@ imp_load_module(PyObject *self, PyObject *args) char *name; PyObject *fob; char *pathname; + PyObject * ret; char *suffix; /* Unused */ char *mode; int type; FILE *fp; - if (!PyArg_ParseTuple(args, "sOs(ssi):load_module", - &name, &fob, &pathname, + if (!PyArg_ParseTuple(args, "sOes(ssi):load_module", + &name, &fob, + Py_FileSystemDefaultEncoding, &pathname, &suffix, &mode, &type)) return NULL; if (*mode) { @@ -3119,6 +3135,7 @@ imp_load_module(PyObject *self, PyObject *args) if (!(*mode == 'r' || *mode == 'U') || strchr(mode, '+')) { PyErr_Format(PyExc_ValueError, "invalid file open mode %.200s", mode); + PyMem_Free(pathname); return NULL; } } @@ -3126,10 +3143,14 @@ imp_load_module(PyObject *self, PyObject *args) fp = NULL; else { fp = get_file(NULL, fob, mode); - if (fp == NULL) + if (fp == NULL) { + PyMem_Free(pathname); return NULL; - } - return load_module(name, fp, pathname, type, NULL); + } + } + ret = load_module(name, fp, pathname, type, NULL); + PyMem_Free(pathname); + return ret; } static PyObject * @@ -3137,9 +3158,13 @@ imp_load_package(PyObject *self, PyObject *args) { char *name; char *pathname; - if (!PyArg_ParseTuple(args, "ss:load_package", &name, &pathname)) + PyObject * ret; + if (!PyArg_ParseTuple(args, "ses:load_package", + &name, Py_FileSystemDefaultEncoding, &pathname)) return NULL; - return load_package(name, pathname); + ret = load_package(name, pathname); + PyMem_Free(pathname); + return ret; } static PyObject * -- cgit v0.12