summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-06-28 06:27:35 (GMT)
committerGitHub <noreply@github.com>2017-06-28 06:27:35 (GMT)
commit0834905d9b61291b1fc5e05a1ffbc69de9c9379f (patch)
tree456e79426ec816ba7e0a0bef7e94a6f8423b2786
parent413c0a92bcc92efe92849fe5e711163da453410b (diff)
downloadcpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.zip
cpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.tar.gz
cpython-0834905d9b61291b1fc5e05a1ffbc69de9c9379f.tar.bz2
[3.6] bpo-13617: Reject embedded null characters in wchar* strings. (GH-2302) (#2462)
Based on patch by Victor Stinner. Add private C API function _PyUnicode_AsUnicode() which is similar to PyUnicode_AsUnicode(), but checks for null characters.. (cherry picked from commit f7eae0adfcd4c50034281b2c69f461b43b68db84)
-rw-r--r--Include/unicodeobject.h10
-rw-r--r--Lib/ctypes/test/test_loading.py2
-rw-r--r--Lib/test/test_builtin.py6
-rw-r--r--Lib/test/test_curses.py11
-rw-r--r--Lib/test/test_grp.py2
-rw-r--r--Lib/test/test_imp.py4
-rw-r--r--Lib/test/test_locale.py5
-rw-r--r--Lib/test/test_time.py4
-rw-r--r--Lib/test/test_winsound.py2
-rw-r--r--Modules/_ctypes/callproc.c7
-rw-r--r--Modules/_cursesmodule.c9
-rw-r--r--Modules/_io/fileio.c3
-rw-r--r--Modules/_localemodule.c5
-rw-r--r--Modules/grpmodule.c1
-rw-r--r--Modules/nismodule.c1
-rw-r--r--Modules/posixmodule.c18
-rw-r--r--Modules/pwdmodule.c1
-rw-r--r--Modules/spwdmodule.c1
-rw-r--r--Objects/unicodeobject.c14
-rw-r--r--PC/_msi.c6
-rw-r--r--Python/dynload_win.c4
-rw-r--r--Python/fileutils.c23
22 files changed, 115 insertions, 24 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index cec2b7f..f498873 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -752,23 +752,27 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
#endif
+#ifndef Py_LIMITED_API
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer.
If the wchar_t/Py_UNICODE representation is not yet available, this
function will calculate it. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
PyObject *unicode /* Unicode object */
);
-#endif
+
+/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
+ contains null characters. */
+PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
+ PyObject *unicode /* Unicode object */
+ );
/* Return a read-only pointer to the Unicode object's internal
Py_UNICODE buffer and save the length at size.
If the wchar_t/Py_UNICODE representation is not yet available, this
function will calculate it. */
-#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
PyObject *unicode, /* Unicode object */
Py_ssize_t *size /* location where to save the length */
diff --git a/Lib/ctypes/test/test_loading.py b/Lib/ctypes/test/test_loading.py
index 45571f3..f3b65b9 100644
--- a/Lib/ctypes/test/test_loading.py
+++ b/Lib/ctypes/test/test_loading.py
@@ -62,6 +62,8 @@ class LoaderTest(unittest.TestCase):
windll["kernel32"].GetModuleHandleW
windll.LoadLibrary("kernel32").GetModuleHandleW
WinDLL("kernel32").GetModuleHandleW
+ # embedded null character
+ self.assertRaises(ValueError, windll.LoadLibrary, "kernel32\0")
@unittest.skipUnless(os.name == "nt",
'test specific to Windows')
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 416316c..7a4b7eb 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -151,6 +151,8 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, __import__, 1, 2, 3, 4)
self.assertRaises(ValueError, __import__, '')
self.assertRaises(TypeError, __import__, 'sys', name='sys')
+ # embedded null character
+ self.assertRaises(ModuleNotFoundError, __import__, 'string\x00')
def test_abs(self):
# int
@@ -1002,6 +1004,10 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(fp.read(300), 'XXX'*100)
self.assertEqual(fp.read(1000), 'YYY'*100)
+ # embedded null bytes and characters
+ self.assertRaises(ValueError, open, 'a\x00b')
+ self.assertRaises(ValueError, open, b'a\x00b')
+
def test_open_default_encoding(self):
old_environ = dict(os.environ)
try:
diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py
index 3d8c50b..0d0b160 100644
--- a/Lib/test/test_curses.py
+++ b/Lib/test/test_curses.py
@@ -81,7 +81,7 @@ class TestCurses(unittest.TestCase):
win2 = curses.newwin(15,15, 5,5)
for meth in [stdscr.addch, stdscr.addstr]:
- for args in [('a'), ('a', curses.A_BOLD),
+ for args in [('a',), ('a', curses.A_BOLD),
(4,4, 'a'), (5,5, 'a', curses.A_BOLD)]:
with self.subTest(meth=meth.__qualname__, args=args):
meth(*args)
@@ -194,6 +194,15 @@ class TestCurses(unittest.TestCase):
self.assertRaises(ValueError, stdscr.instr, -2)
self.assertRaises(ValueError, stdscr.instr, 2, 3, -2)
+ def test_embedded_null_chars(self):
+ # reject embedded null bytes and characters
+ stdscr = self.stdscr
+ for arg in ['a', b'a']:
+ with self.subTest(arg=arg):
+ self.assertRaises(ValueError, stdscr.addstr, 'a\0')
+ self.assertRaises(ValueError, stdscr.addnstr, 'a\0', 1)
+ self.assertRaises(ValueError, stdscr.insstr, 'a\0')
+ self.assertRaises(ValueError, stdscr.insnstr, 'a\0', 1)
def test_module_funcs(self):
"Test module-level functions"
diff --git a/Lib/test/test_grp.py b/Lib/test/test_grp.py
index 69095a3..e511947 100644
--- a/Lib/test/test_grp.py
+++ b/Lib/test/test_grp.py
@@ -50,6 +50,8 @@ class GroupDatabaseTestCase(unittest.TestCase):
self.assertRaises(TypeError, grp.getgrgid)
self.assertRaises(TypeError, grp.getgrnam)
self.assertRaises(TypeError, grp.getgrall, 42)
+ # embedded null character
+ self.assertRaises(ValueError, grp.getgrnam, 'a\x00b')
# try to get some errors
bynames = {}
diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py
index 4ece365..6f35f49 100644
--- a/Lib/test/test_imp.py
+++ b/Lib/test/test_imp.py
@@ -314,6 +314,10 @@ class ImportTests(unittest.TestCase):
loader.get_data(imp.__file__) # File should be closed
loader.get_data(imp.__file__) # Will need to create a newly opened file
+ def test_load_source(self):
+ with self.assertRaisesRegex(ValueError, 'embedded null'):
+ imp.load_source(__name__, __file__ + "\0")
+
class ReloadTests(unittest.TestCase):
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py
index 99fab58..650d737 100644
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -339,9 +339,14 @@ class TestCollation(unittest.TestCase):
self.assertLess(locale.strcoll('a', 'b'), 0)
self.assertEqual(locale.strcoll('a', 'a'), 0)
self.assertGreater(locale.strcoll('b', 'a'), 0)
+ # embedded null character
+ self.assertRaises(ValueError, locale.strcoll, 'a\0', 'a')
+ self.assertRaises(ValueError, locale.strcoll, 'a', 'a\0')
def test_strxfrm(self):
self.assertLess(locale.strxfrm('a'), locale.strxfrm('b'))
+ # embedded null character
+ self.assertRaises(ValueError, locale.strxfrm, 'a\0')
class TestEnUSCollation(BaseLocalizedTest, TestCollation):
diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py
index f224212..810ec37 100644
--- a/Lib/test/test_time.py
+++ b/Lib/test/test_time.py
@@ -126,6 +126,10 @@ class TimeTestCase(unittest.TestCase):
except ValueError:
self.fail('conversion specifier: %r failed.' % format)
+ self.assertRaises(TypeError, time.strftime, b'%S', tt)
+ # embedded null character
+ self.assertRaises(ValueError, time.strftime, '%S\0', tt)
+
def _bounds_checking(self, func):
# Make sure that strftime() checks the bounds of the various parts
# of the time tuple (0 is valid for *all* values).
diff --git a/Lib/test/test_winsound.py b/Lib/test/test_winsound.py
index 179e069..21437ef 100644
--- a/Lib/test/test_winsound.py
+++ b/Lib/test/test_winsound.py
@@ -98,6 +98,8 @@ class PlaySoundTest(unittest.TestCase):
self.assertRaises(TypeError, winsound.PlaySound, "bad",
winsound.SND_MEMORY)
self.assertRaises(TypeError, winsound.PlaySound, 1, 0)
+ # embedded null character
+ self.assertRaises(ValueError, winsound.PlaySound, 'bad\0', 0)
def test_keyword_args(self):
safe_PlaySound(flags=winsound.SND_ALIAS, sound="SystemExit")
diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
index 358f287..ff95dff 100644
--- a/Modules/_ctypes/callproc.c
+++ b/Modules/_ctypes/callproc.c
@@ -1231,14 +1231,15 @@ The handle may be used to locate exported functions in this\n\
module.\n";
static PyObject *load_library(PyObject *self, PyObject *args)
{
- WCHAR *name;
+ const WCHAR *name;
PyObject *nameobj;
PyObject *ignored;
HMODULE hMod;
- if (!PyArg_ParseTuple(args, "O|O:LoadLibrary", &nameobj, &ignored))
+
+ if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored))
return NULL;
- name = PyUnicode_AsUnicode(nameobj);
+ name = _PyUnicode_AsUnicode(nameobj);
if (!name)
return NULL;
diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c
index 41b831e..7a70951 100644
--- a/Modules/_cursesmodule.c
+++ b/Modules/_cursesmodule.c
@@ -342,6 +342,7 @@ static int
PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
PyObject **bytes, wchar_t **wstr)
{
+ char *str;
if (PyUnicode_Check(obj)) {
#ifdef HAVE_NCURSESW
assert (wstr != NULL);
@@ -354,12 +355,20 @@ PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
*bytes = PyUnicode_AsEncodedString(obj, win->encoding, NULL);
if (*bytes == NULL)
return 0;
+ /* check for embedded null bytes */
+ if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
+ return 0;
+ }
return 1;
#endif
}
else if (PyBytes_Check(obj)) {
Py_INCREF(obj);
*bytes = obj;
+ /* check for embedded null bytes */
+ if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
+ return 0;
+ }
return 1;
}
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index 833ea8e..918fa57 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -280,11 +280,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
if (fd < 0) {
#ifdef MS_WINDOWS
- Py_ssize_t length;
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
return -1;
}
- widename = PyUnicode_AsUnicodeAndSize(stringobj, &length);
+ widename = PyUnicode_AsUnicode(stringobj);
if (widename == NULL)
return -1;
#else
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 0c7c3cd..71c9146 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -252,6 +252,11 @@ PyLocale_strxfrm(PyObject* self, PyObject* args)
s = PyUnicode_AsWideCharString(str, &n1);
if (s == NULL)
goto exit;
+ if (wcslen(s) != (size_t)n1) {
+ PyErr_SetString(PyExc_ValueError,
+ "embedded null character");
+ goto exit;
+ }
/* assume no change in size, first */
n1 = n1 + 1;
diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c
index 9437ae7..f577fd3 100644
--- a/Modules/grpmodule.c
+++ b/Modules/grpmodule.c
@@ -151,6 +151,7 @@ grp_getgrnam_impl(PyObject *module, PyObject *name)
if ((bytes = PyUnicode_EncodeFSDefault(name)) == NULL)
return NULL;
+ /* check for embedded null bytes */
if (PyBytes_AsStringAndSize(bytes, &name_chars, NULL) == -1)
goto out;
diff --git a/Modules/nismodule.c b/Modules/nismodule.c
index b6a855c..a9028bb 100644
--- a/Modules/nismodule.c
+++ b/Modules/nismodule.c
@@ -169,6 +169,7 @@ nis_match (PyObject *self, PyObject *args, PyObject *kwdict)
return NULL;
if ((bkey = PyUnicode_EncodeFSDefault(ukey)) == NULL)
return NULL;
+ /* check for embedded null bytes */
if (PyBytes_AsStringAndSize(bkey, &key, &keylen) == -1) {
Py_DECREF(bkey);
return NULL;
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c
index bf82543..4607b18 100644
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -3680,7 +3680,7 @@ os__getfinalpathname_impl(PyObject *module, PyObject *path)
PyObject *result;
const wchar_t *path_wchar;
- path_wchar = PyUnicode_AsUnicode(path);
+ path_wchar = _PyUnicode_AsUnicode(path);
if (path_wchar == NULL)
return NULL;
@@ -7088,7 +7088,7 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs)
))
return NULL;
- path = PyUnicode_AsUnicode(po);
+ path = _PyUnicode_AsUnicode(po);
if (path == NULL)
return NULL;
@@ -8881,6 +8881,7 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
/*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/
{
const wchar_t *env;
+ Py_ssize_t size;
/* Search from index 1 because on Windows starting '=' is allowed for
defining hidden environment variables. */
@@ -8894,16 +8895,21 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
if (unicode == NULL) {
return NULL;
}
- if (_MAX_ENV < PyUnicode_GET_LENGTH(unicode)) {
+
+ env = PyUnicode_AsUnicodeAndSize(unicode, &size);
+ if (env == NULL)
+ goto error;
+ if (size > _MAX_ENV) {
PyErr_Format(PyExc_ValueError,
"the environment variable is longer than %u characters",
_MAX_ENV);
goto error;
}
-
- env = PyUnicode_AsUnicode(unicode);
- if (env == NULL)
+ if (wcslen(env) != (size_t)size) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
goto error;
+ }
+
if (_wputenv(env)) {
posix_error();
goto error;
diff --git a/Modules/pwdmodule.c b/Modules/pwdmodule.c
index 784e9d0..bbef2de 100644
--- a/Modules/pwdmodule.c
+++ b/Modules/pwdmodule.c
@@ -158,6 +158,7 @@ pwd_getpwnam_impl(PyObject *module, PyObject *arg)
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
+ /* check for embedded null bytes */
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;
if ((p = getpwnam(name)) == NULL) {
diff --git a/Modules/spwdmodule.c b/Modules/spwdmodule.c
index 556a715..1601ec0 100644
--- a/Modules/spwdmodule.c
+++ b/Modules/spwdmodule.c
@@ -134,6 +134,7 @@ spwd_getspnam_impl(PyObject *module, PyObject *arg)
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
+ /* check for embedded null bytes */
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;
if ((p = getspnam(name)) == NULL) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3767064..494cdbd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4164,6 +4164,20 @@ PyUnicode_AsUnicode(PyObject *unicode)
return PyUnicode_AsUnicodeAndSize(unicode, NULL);
}
+const Py_UNICODE *
+_PyUnicode_AsUnicode(PyObject *unicode)
+{
+ Py_ssize_t size;
+ const Py_UNICODE *wstr;
+
+ wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
+ if (wstr && wcslen(wstr) != (size_t)size) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ return NULL;
+ }
+ return wstr;
+}
+
Py_ssize_t
PyUnicode_GetSize(PyObject *unicode)
diff --git a/PC/_msi.c b/PC/_msi.c
index 789b04f..15aa7d8 100644
--- a/PC/_msi.c
+++ b/PC/_msi.c
@@ -600,8 +600,12 @@ summary_setproperty(msiobj* si, PyObject *args)
return NULL;
if (PyUnicode_Check(data)) {
+ const WCHAR *value = _PyUnicode_AsUnicode(data);
+ if (value == NULL) {
+ return NULL;
+ }
status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR,
- 0, NULL, PyUnicode_AsUnicode(data));
+ 0, NULL, value);
} else if (PyLong_CheckExact(data)) {
long value = PyLong_AsLong(data);
if (value == -1 && PyErr_Occurred()) {
diff --git a/Python/dynload_win.c b/Python/dynload_win.c
index 05050cf..0fdf77f 100644
--- a/Python/dynload_win.c
+++ b/Python/dynload_win.c
@@ -190,13 +190,13 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,
{
dl_funcptr p;
char funcname[258], *import_python;
- wchar_t *wpathname;
+ const wchar_t *wpathname;
#ifndef _DEBUG
_Py_CheckPython3();
#endif
- wpathname = PyUnicode_AsUnicode(pathname);
+ wpathname = _PyUnicode_AsUnicode(pathname);
if (wpathname == NULL)
return NULL;
diff --git a/Python/fileutils.c b/Python/fileutils.c
index f3764e4..97505e5 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -711,21 +711,32 @@ _Py_stat(PyObject *path, struct stat *statbuf)
#ifdef MS_WINDOWS
int err;
struct _stat wstatbuf;
- wchar_t *wpath;
+ const wchar_t *wpath;
- wpath = PyUnicode_AsUnicode(path);
+ wpath = _PyUnicode_AsUnicode(path);
if (wpath == NULL)
return -2;
+
err = _wstat(wpath, &wstatbuf);
if (!err)
statbuf->st_mode = wstatbuf.st_mode;
return err;
#else
int ret;
- PyObject *bytes = PyUnicode_EncodeFSDefault(path);
+ PyObject *bytes;
+ char *cpath;
+
+ bytes = PyUnicode_EncodeFSDefault(path);
if (bytes == NULL)
return -2;
- ret = stat(PyBytes_AS_STRING(bytes), statbuf);
+
+ /* check for embedded null bytes */
+ if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
+ Py_DECREF(bytes);
+ return -2;
+ }
+
+ ret = stat(cpath, statbuf);
Py_DECREF(bytes);
return ret;
#endif
@@ -1080,7 +1091,7 @@ _Py_fopen_obj(PyObject *path, const char *mode)
FILE *f;
int async_err = 0;
#ifdef MS_WINDOWS
- wchar_t *wpath;
+ const wchar_t *wpath;
wchar_t wmode[10];
int usize;
@@ -1094,7 +1105,7 @@ _Py_fopen_obj(PyObject *path, const char *mode)
Py_TYPE(path));
return NULL;
}
- wpath = PyUnicode_AsUnicode(path);
+ wpath = _PyUnicode_AsUnicode(path);
if (wpath == NULL)
return NULL;