diff options
author | Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> | 2023-07-15 02:17:09 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-15 02:17:09 (GMT) |
commit | d488970ae6e4b0de1212e202602be686de49ab7a (patch) | |
tree | da5b982a63c3abd4e84fc0591b8a33150535b8e6 /Modules | |
parent | 2186212191f2ef149d73073b4583d74a49d176e2 (diff) | |
download | cpython-d488970ae6e4b0de1212e202602be686de49ab7a.zip cpython-d488970ae6e4b0de1212e202602be686de49ab7a.tar.gz cpython-d488970ae6e4b0de1212e202602be686de49ab7a.tar.bz2 |
[3.11] gh-105235: Prevent reading outside buffer during mmap.find() (… (#106710)
[3.11] gh-105235: Prevent reading outside buffer during mmap.find() (GH-105252)
* Add a special case for s[-m:] == p in _PyBytes_Find
* Add tests for _PyBytes_Find
* Make sure that start <= end in mmap.find.
(cherry picked from commit ab86426a3472ab68747815299d390b213793c3d1)
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_testinternalcapi.c | 114 | ||||
-rw-r--r-- | Modules/mmapmodule.c | 7 |
2 files changed, 120 insertions, 1 deletions
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 238de74..4a8c436 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,6 +14,7 @@ #include "Python.h" #include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_bitutils.h" // _Py_bswap32() +#include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_normpath #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_gc.h" // PyGC_Head @@ -380,6 +381,118 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_bytes_find(const char *haystack0, const char *needle0, + int offset, Py_ssize_t expected) +{ + Py_ssize_t len_haystack = strlen(haystack0); + Py_ssize_t len_needle = strlen(needle0); + Py_ssize_t result_1 = _PyBytes_Find(haystack0, len_haystack, + needle0, len_needle, offset); + if (result_1 != expected) { + PyErr_Format(PyExc_AssertionError, + "Incorrect result_1: '%s' in '%s' (offset=%zd)", + needle0, haystack0, offset); + return -1; + } + // Allocate new buffer with no NULL terminator. + char *haystack = PyMem_Malloc(len_haystack); + if (haystack == NULL) { + PyErr_NoMemory(); + return -1; + } + char *needle = PyMem_Malloc(len_needle); + if (needle == NULL) { + PyMem_Free(haystack); + PyErr_NoMemory(); + return -1; + } + memcpy(haystack, haystack0, len_haystack); + memcpy(needle, needle0, len_needle); + Py_ssize_t result_2 = _PyBytes_Find(haystack, len_haystack, + needle, len_needle, offset); + PyMem_Free(haystack); + PyMem_Free(needle); + if (result_2 != expected) { + PyErr_Format(PyExc_AssertionError, + "Incorrect result_2: '%s' in '%s' (offset=%zd)", + needle0, haystack0, offset); + return -1; + } + return 0; +} + +static int +check_bytes_find_large(Py_ssize_t len_haystack, Py_ssize_t len_needle, + const char *needle) +{ + char *zeros = PyMem_RawCalloc(len_haystack, 1); + if (zeros == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t res = _PyBytes_Find(zeros, len_haystack, needle, len_needle, 0); + PyMem_RawFree(zeros); + if (res != -1) { + PyErr_Format(PyExc_AssertionError, + "check_bytes_find_large(%zd, %zd) found %zd", + len_haystack, len_needle, res); + return -1; + } + return 0; +} + +static PyObject * +test_bytes_find(PyObject *self, PyObject *Py_UNUSED(args)) +{ + #define CHECK(H, N, O, E) do { \ + if (check_bytes_find(H, N, O, E) < 0) { \ + return NULL; \ + } \ + } while (0) + + CHECK("", "", 0, 0); + CHECK("Python", "", 0, 0); + CHECK("Python", "", 3, 3); + CHECK("Python", "", 6, 6); + CHECK("Python", "yth", 0, 1); + CHECK("ython", "yth", 1, 1); + CHECK("thon", "yth", 2, -1); + CHECK("Python", "thon", 0, 2); + CHECK("ython", "thon", 1, 2); + CHECK("thon", "thon", 2, 2); + CHECK("hon", "thon", 3, -1); + CHECK("Pytho", "zz", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ab", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ba", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bb", 0, -1); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "ab", 0, 30); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", "ba", 0, 30); + CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb", "bb", 0, 30); + #undef CHECK + + // Hunt for segfaults + // n, m chosen here so that (n - m) % (m + 1) == 0 + // This would make default_find in fastsearch.h access haystack[n]. + if (check_bytes_find_large(2048, 2, "ab") < 0) { + return NULL; + } + if (check_bytes_find_large(4096, 16, "0123456789abcdef") < 0) { + return NULL; + } + if (check_bytes_find_large(8192, 2, "ab") < 0) { + return NULL; + } + if (check_bytes_find_large(16384, 4, "abcd") < 0) { + return NULL; + } + if (check_bytes_find_large(32768, 2, "ab") < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject * normalize_path(PyObject *self, PyObject *filename) { @@ -537,6 +650,7 @@ static PyMethodDef TestMethods[] = { {"reset_path_config", test_reset_path_config, METH_NOARGS}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS}, + {"test_bytes_find", test_bytes_find, METH_NOARGS}, {"normalize_path", normalize_path, METH_O, NULL}, {"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL}, {"EncodeLocaleEx", encode_locale_ex, METH_VARARGS}, diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 4b0c1e2..8ff63f5 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -351,12 +351,17 @@ mmap_gfind(mmap_object *self, Py_ssize_t res; CHECK_VALID_OR_RELEASE(NULL, view); - if (reverse) { + if (end < start) { + res = -1; + } + else if (reverse) { + assert(0 <= start && start <= end && end <= self->size); res = _PyBytes_ReverseFind( self->data + start, end - start, view.buf, view.len, start); } else { + assert(0 <= start && start <= end && end <= self->size); res = _PyBytes_Find( self->data + start, end - start, view.buf, view.len, start); |