diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-06-28 06:27:35 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-28 06:27:35 (GMT) |
commit | 0834905d9b61291b1fc5e05a1ffbc69de9c9379f (patch) | |
tree | 456e79426ec816ba7e0a0bef7e94a6f8423b2786 | |
parent | 413c0a92bcc92efe92849fe5e711163da453410b (diff) | |
download | cpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.zip cpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.tar.gz cpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.tar.bz2 |
[3.6] bpo-13617: Reject embedded null characters in wchar* strings. (GH-2302) (#2462)
Based on patch by Victor Stinner.
Add private C API function _PyUnicode_AsUnicode() which is similar to
PyUnicode_AsUnicode(), but checks for null characters..
(cherry picked from commit f7eae0adfcd4c50034281b2c69f461b43b68db84)
-rw-r--r-- | Include/unicodeobject.h | 10 | ||||
-rw-r--r-- | Lib/ctypes/test/test_loading.py | 2 | ||||
-rw-r--r-- | Lib/test/test_builtin.py | 6 | ||||
-rw-r--r-- | Lib/test/test_curses.py | 11 | ||||
-rw-r--r-- | Lib/test/test_grp.py | 2 | ||||
-rw-r--r-- | Lib/test/test_imp.py | 4 | ||||
-rw-r--r-- | Lib/test/test_locale.py | 5 | ||||
-rw-r--r-- | Lib/test/test_time.py | 4 | ||||
-rw-r--r-- | Lib/test/test_winsound.py | 2 | ||||
-rw-r--r-- | Modules/_ctypes/callproc.c | 7 | ||||
-rw-r--r-- | Modules/_cursesmodule.c | 9 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 3 | ||||
-rw-r--r-- | Modules/_localemodule.c | 5 | ||||
-rw-r--r-- | Modules/grpmodule.c | 1 | ||||
-rw-r--r-- | Modules/nismodule.c | 1 | ||||
-rw-r--r-- | Modules/posixmodule.c | 18 | ||||
-rw-r--r-- | Modules/pwdmodule.c | 1 | ||||
-rw-r--r-- | Modules/spwdmodule.c | 1 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 14 | ||||
-rw-r--r-- | PC/_msi.c | 6 | ||||
-rw-r--r-- | Python/dynload_win.c | 4 | ||||
-rw-r--r-- | Python/fileutils.c | 23 |
22 files changed, 115 insertions, 24 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index cec2b7f..f498873 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -752,23 +752,27 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); #endif +#ifndef Py_LIMITED_API /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer. If the wchar_t/Py_UNICODE representation is not yet available, this function will calculate it. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( PyObject *unicode /* Unicode object */ ); -#endif + +/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string + contains null characters. */ +PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( + PyObject *unicode /* Unicode object */ + ); /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer and save the length at size. If the wchar_t/Py_UNICODE representation is not yet available, this function will calculate it. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( PyObject *unicode, /* Unicode object */ Py_ssize_t *size /* location where to save the length */ diff --git a/Lib/ctypes/test/test_loading.py b/Lib/ctypes/test/test_loading.py index 45571f3..f3b65b9 100644 --- a/Lib/ctypes/test/test_loading.py +++ b/Lib/ctypes/test/test_loading.py @@ -62,6 +62,8 @@ class LoaderTest(unittest.TestCase): windll["kernel32"].GetModuleHandleW windll.LoadLibrary("kernel32").GetModuleHandleW WinDLL("kernel32").GetModuleHandleW + # embedded null character + self.assertRaises(ValueError, windll.LoadLibrary, "kernel32\0") @unittest.skipUnless(os.name == "nt", 'test specific to Windows') diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 416316c..7a4b7eb 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -151,6 +151,8 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, __import__, 1, 2, 3, 4) self.assertRaises(ValueError, __import__, '') self.assertRaises(TypeError, __import__, 'sys', name='sys') + # embedded null character + self.assertRaises(ModuleNotFoundError, __import__, 'string\x00') def test_abs(self): # int @@ -1002,6 +1004,10 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(fp.read(300), 'XXX'*100) self.assertEqual(fp.read(1000), 'YYY'*100) + # embedded null bytes and characters + self.assertRaises(ValueError, open, 'a\x00b') + self.assertRaises(ValueError, open, b'a\x00b') + def test_open_default_encoding(self): old_environ = dict(os.environ) try: diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index 3d8c50b..0d0b160 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -81,7 +81,7 @@ class TestCurses(unittest.TestCase): win2 = curses.newwin(15,15, 5,5) for meth in [stdscr.addch, stdscr.addstr]: - for args in [('a'), ('a', curses.A_BOLD), + for args in [('a',), ('a', curses.A_BOLD), (4,4, 'a'), (5,5, 'a', curses.A_BOLD)]: with self.subTest(meth=meth.__qualname__, args=args): meth(*args) @@ -194,6 +194,15 @@ class TestCurses(unittest.TestCase): self.assertRaises(ValueError, stdscr.instr, -2) self.assertRaises(ValueError, stdscr.instr, 2, 3, -2) + def test_embedded_null_chars(self): + # reject embedded null bytes and characters + stdscr = self.stdscr + for arg in ['a', b'a']: + with self.subTest(arg=arg): + self.assertRaises(ValueError, stdscr.addstr, 'a\0') + self.assertRaises(ValueError, stdscr.addnstr, 'a\0', 1) + self.assertRaises(ValueError, stdscr.insstr, 'a\0') + self.assertRaises(ValueError, stdscr.insnstr, 'a\0', 1) def test_module_funcs(self): "Test module-level functions" diff --git a/Lib/test/test_grp.py b/Lib/test/test_grp.py index 69095a3..e511947 100644 --- a/Lib/test/test_grp.py +++ b/Lib/test/test_grp.py @@ -50,6 +50,8 @@ class GroupDatabaseTestCase(unittest.TestCase): self.assertRaises(TypeError, grp.getgrgid) self.assertRaises(TypeError, grp.getgrnam) self.assertRaises(TypeError, grp.getgrall, 42) + # embedded null character + self.assertRaises(ValueError, grp.getgrnam, 'a\x00b') # try to get some errors bynames = {} diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 4ece365..6f35f49 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -314,6 +314,10 @@ class ImportTests(unittest.TestCase): loader.get_data(imp.__file__) # File should be closed loader.get_data(imp.__file__) # Will need to create a newly opened file + def test_load_source(self): + with self.assertRaisesRegex(ValueError, 'embedded null'): + imp.load_source(__name__, __file__ + "\0") + class ReloadTests(unittest.TestCase): diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 99fab58..650d737 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -339,9 +339,14 @@ class TestCollation(unittest.TestCase): self.assertLess(locale.strcoll('a', 'b'), 0) self.assertEqual(locale.strcoll('a', 'a'), 0) self.assertGreater(locale.strcoll('b', 'a'), 0) + # embedded null character + self.assertRaises(ValueError, locale.strcoll, 'a\0', 'a') + self.assertRaises(ValueError, locale.strcoll, 'a', 'a\0') def test_strxfrm(self): self.assertLess(locale.strxfrm('a'), locale.strxfrm('b')) + # embedded null character + self.assertRaises(ValueError, locale.strxfrm, 'a\0') class TestEnUSCollation(BaseLocalizedTest, TestCollation): diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index f224212..810ec37 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -126,6 +126,10 @@ class TimeTestCase(unittest.TestCase): except ValueError: self.fail('conversion specifier: %r failed.' % format) + self.assertRaises(TypeError, time.strftime, b'%S', tt) + # embedded null character + self.assertRaises(ValueError, time.strftime, '%S\0', tt) + def _bounds_checking(self, func): # Make sure that strftime() checks the bounds of the various parts # of the time tuple (0 is valid for *all* values). diff --git a/Lib/test/test_winsound.py b/Lib/test/test_winsound.py index 179e069..21437ef 100644 --- a/Lib/test/test_winsound.py +++ b/Lib/test/test_winsound.py @@ -98,6 +98,8 @@ class PlaySoundTest(unittest.TestCase): self.assertRaises(TypeError, winsound.PlaySound, "bad", winsound.SND_MEMORY) self.assertRaises(TypeError, winsound.PlaySound, 1, 0) + # embedded null character + self.assertRaises(ValueError, winsound.PlaySound, 'bad\0', 0) def test_keyword_args(self): safe_PlaySound(flags=winsound.SND_ALIAS, sound="SystemExit") diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 358f287..ff95dff 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1231,14 +1231,15 @@ The handle may be used to locate exported functions in this\n\ module.\n"; static PyObject *load_library(PyObject *self, PyObject *args) { - WCHAR *name; + const WCHAR *name; PyObject *nameobj; PyObject *ignored; HMODULE hMod; - if (!PyArg_ParseTuple(args, "O|O:LoadLibrary", &nameobj, &ignored)) + + if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored)) return NULL; - name = PyUnicode_AsUnicode(nameobj); + name = _PyUnicode_AsUnicode(nameobj); if (!name) return NULL; diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index 41b831e..7a70951 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -342,6 +342,7 @@ static int PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj, PyObject **bytes, wchar_t **wstr) { + char *str; if (PyUnicode_Check(obj)) { #ifdef HAVE_NCURSESW assert (wstr != NULL); @@ -354,12 +355,20 @@ PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj, *bytes = PyUnicode_AsEncodedString(obj, win->encoding, NULL); if (*bytes == NULL) return 0; + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) { + return 0; + } return 1; #endif } else if (PyBytes_Check(obj)) { Py_INCREF(obj); *bytes = obj; + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) { + return 0; + } return 1; } diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 833ea8e..918fa57 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -280,11 +280,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (fd < 0) { #ifdef MS_WINDOWS - Py_ssize_t length; if (!PyUnicode_FSDecoder(nameobj, &stringobj)) { return -1; } - widename = PyUnicode_AsUnicodeAndSize(stringobj, &length); + widename = PyUnicode_AsUnicode(stringobj); if (widename == NULL) return -1; #else diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 0c7c3cd..71c9146 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -252,6 +252,11 @@ PyLocale_strxfrm(PyObject* self, PyObject* args) s = PyUnicode_AsWideCharString(str, &n1); if (s == NULL) goto exit; + if (wcslen(s) != (size_t)n1) { + PyErr_SetString(PyExc_ValueError, + "embedded null character"); + goto exit; + } /* assume no change in size, first */ n1 = n1 + 1; diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c index 9437ae7..f577fd3 100644 --- a/Modules/grpmodule.c +++ b/Modules/grpmodule.c @@ -151,6 +151,7 @@ grp_getgrnam_impl(PyObject *module, PyObject *name) if ((bytes = PyUnicode_EncodeFSDefault(name)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name_chars, NULL) == -1) goto out; diff --git a/Modules/nismodule.c b/Modules/nismodule.c index b6a855c..a9028bb 100644 --- a/Modules/nismodule.c +++ b/Modules/nismodule.c @@ -169,6 +169,7 @@ nis_match (PyObject *self, PyObject *args, PyObject *kwdict) return NULL; if ((bkey = PyUnicode_EncodeFSDefault(ukey)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bkey, &key, &keylen) == -1) { Py_DECREF(bkey); return NULL; diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bf82543..4607b18 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -3680,7 +3680,7 @@ os__getfinalpathname_impl(PyObject *module, PyObject *path) PyObject *result; const wchar_t *path_wchar; - path_wchar = PyUnicode_AsUnicode(path); + path_wchar = _PyUnicode_AsUnicode(path); if (path_wchar == NULL) return NULL; @@ -7088,7 +7088,7 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs) )) return NULL; - path = PyUnicode_AsUnicode(po); + path = _PyUnicode_AsUnicode(po); if (path == NULL) return NULL; @@ -8881,6 +8881,7 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) /*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/ { const wchar_t *env; + Py_ssize_t size; /* Search from index 1 because on Windows starting '=' is allowed for defining hidden environment variables. */ @@ -8894,16 +8895,21 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) if (unicode == NULL) { return NULL; } - if (_MAX_ENV < PyUnicode_GET_LENGTH(unicode)) { + + env = PyUnicode_AsUnicodeAndSize(unicode, &size); + if (env == NULL) + goto error; + if (size > _MAX_ENV) { PyErr_Format(PyExc_ValueError, "the environment variable is longer than %u characters", _MAX_ENV); goto error; } - - env = PyUnicode_AsUnicode(unicode); - if (env == NULL) + if (wcslen(env) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); goto error; + } + if (_wputenv(env)) { posix_error(); goto error; diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c index 784e9d0..bbef2de 100644 --- a/Modules/pwdmodule.c +++ b/Modules/pwdmodule.c @@ -158,6 +158,7 @@ pwd_getpwnam_impl(PyObject *module, PyObject *arg) if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; if ((p = getpwnam(name)) == NULL) { diff --git a/Modules/spwdmodule.c b/Modules/spwdmodule.c index 556a715..1601ec0 100644 --- a/Modules/spwdmodule.c +++ b/Modules/spwdmodule.c @@ -134,6 +134,7 @@ spwd_getspnam_impl(PyObject *module, PyObject *arg) if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL) return NULL; + /* check for embedded null bytes */ if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1) goto out; if ((p = getspnam(name)) == NULL) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3767064..494cdbd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4164,6 +4164,20 @@ PyUnicode_AsUnicode(PyObject *unicode) return PyUnicode_AsUnicodeAndSize(unicode, NULL); } +const Py_UNICODE * +_PyUnicode_AsUnicode(PyObject *unicode) +{ + Py_ssize_t size; + const Py_UNICODE *wstr; + + wstr = PyUnicode_AsUnicodeAndSize(unicode, &size); + if (wstr && wcslen(wstr) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return wstr; +} + Py_ssize_t PyUnicode_GetSize(PyObject *unicode) @@ -600,8 +600,12 @@ summary_setproperty(msiobj* si, PyObject *args) return NULL; if (PyUnicode_Check(data)) { + const WCHAR *value = _PyUnicode_AsUnicode(data); + if (value == NULL) { + return NULL; + } status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR, - 0, NULL, PyUnicode_AsUnicode(data)); + 0, NULL, value); } else if (PyLong_CheckExact(data)) { long value = PyLong_AsLong(data); if (value == -1 && PyErr_Occurred()) { diff --git a/Python/dynload_win.c b/Python/dynload_win.c index 05050cf..0fdf77f 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -190,13 +190,13 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, { dl_funcptr p; char funcname[258], *import_python; - wchar_t *wpathname; + const wchar_t *wpathname; #ifndef _DEBUG _Py_CheckPython3(); #endif - wpathname = PyUnicode_AsUnicode(pathname); + wpathname = _PyUnicode_AsUnicode(pathname); if (wpathname == NULL) return NULL; diff --git a/Python/fileutils.c b/Python/fileutils.c index f3764e4..97505e5 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -711,21 +711,32 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; struct _stat wstatbuf; - wchar_t *wpath; + const wchar_t *wpath; - wpath = PyUnicode_AsUnicode(path); + wpath = _PyUnicode_AsUnicode(path); if (wpath == NULL) return -2; + err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; return err; #else int ret; - PyObject *bytes = PyUnicode_EncodeFSDefault(path); + PyObject *bytes; + char *cpath; + + bytes = PyUnicode_EncodeFSDefault(path); if (bytes == NULL) return -2; - ret = stat(PyBytes_AS_STRING(bytes), statbuf); + + /* check for embedded null bytes */ + if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) { + Py_DECREF(bytes); + return -2; + } + + ret = stat(cpath, statbuf); Py_DECREF(bytes); return ret; #endif @@ -1080,7 +1091,7 @@ _Py_fopen_obj(PyObject *path, const char *mode) FILE *f; int async_err = 0; #ifdef MS_WINDOWS - wchar_t *wpath; + const wchar_t *wpath; wchar_t wmode[10]; int usize; @@ -1094,7 +1105,7 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_TYPE(path)); return NULL; } - wpath = PyUnicode_AsUnicode(path); + wpath = _PyUnicode_AsUnicode(path); if (wpath == NULL) return NULL; |