diff options
author | Mark Dickinson <dickinsm@gmail.com> | 2022-09-25 09:55:33 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-25 09:55:33 (GMT) |
commit | f5f047aa628caeca680745c55e24519f06aa6724 (patch) | |
tree | 55a32b33e0c24c2f1b6da99c5ebca8b687a9f884 /Modules | |
parent | 817fa28f81eed43539fad2c8e696df954afa6ad7 (diff) | |
download | cpython-f5f047aa628caeca680745c55e24519f06aa6724.zip cpython-f5f047aa628caeca680745c55e24519f06aa6724.tar.gz cpython-f5f047aa628caeca680745c55e24519f06aa6724.tar.bz2 |
gh-96735: Fix undefined behaviour in struct unpacking functions (#96739)
This PR fixes undefined behaviour in the struct module unpacking support functions `bu_longlong`, `lu_longlong`, `bu_int` and `lu_int`; thanks to @kumaraditya303 for finding these.
The fix is to accumulate the bytes in an unsigned integer type instead of a signed integer type, then to convert to the appropriate signed type. In cases where the width matches, that conversion will typically be compiled away to a no-op.
(Evidence from Godbolt: https://godbolt.org/z/5zvxodj64 .)
To make the conversions efficient, I've specialised the relevant functions for their output size: for `bu_longlong` and `lu_longlong`, this only entails checking that the output size is indeed `8`. But `bu_int` and `lu_int` were used for format sizes `2` and `4` - I've split those into two separate functions each.
No tests, because all of the affected cases are already exercised by the test suite.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_struct.c | 96 |
1 files changed, 70 insertions, 26 deletions
diff --git a/Modules/_struct.c b/Modules/_struct.c index 9d66568..09f52a9 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -806,18 +806,37 @@ static const formatdef native_table[] = { /* Big-endian routines. *****************************************************/ static PyObject * +bu_short(_structmodulestate *state, const char *p, const formatdef *f) +{ + unsigned long x = 0; + + /* This function is only ever used in the case f->size == 2. */ + assert(f->size == 2); + Py_ssize_t i = 2; + const unsigned char *bytes = (const unsigned char *)p; + do { + x = (x<<8) | *bytes++; + } while (--i > 0); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x8000U) - 0x8000U; + return PyLong_FromLong(x & 0x8000U ? -1 - (long)(~x) : (long)x); +} + +static PyObject * bu_int(_structmodulestate *state, const char *p, const formatdef *f) { - long x = 0; - Py_ssize_t i = f->size; + unsigned long x = 0; + + /* This function is only ever used in the case f->size == 4. */ + assert(f->size == 4); + Py_ssize_t i = 4; const unsigned char *bytes = (const unsigned char *)p; do { x = (x<<8) | *bytes++; } while (--i > 0); - /* Extend the sign bit. */ - if (SIZEOF_LONG > f->size) - x |= -(x & (1L << ((8 * f->size) - 1))); - return PyLong_FromLong(x); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x80000000U) - 0x80000000U; + return PyLong_FromLong(x & 0x80000000U ? -1 - (long)(~x) : (long)x); } static PyObject * @@ -835,16 +854,19 @@ bu_uint(_structmodulestate *state, const char *p, const formatdef *f) static PyObject * bu_longlong(_structmodulestate *state, const char *p, const formatdef *f) { - long long x = 0; - Py_ssize_t i = f->size; + unsigned long long x = 0; + + /* This function is only ever used in the case f->size == 8. */ + assert(f->size == 8); + Py_ssize_t i = 8; const unsigned char *bytes = (const unsigned char *)p; do { x = (x<<8) | *bytes++; } while (--i > 0); - /* Extend the sign bit. */ - if (SIZEOF_LONG_LONG > f->size) - x |= -(x & ((long long)1 << ((8 * f->size) - 1))); - return PyLong_FromLongLong(x); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x8000000000000000U) - 0x8000000000000000U; + return PyLong_FromLongLong( + x & 0x8000000000000000U ? -1 - (long long)(~x) : (long long)x); } static PyObject * @@ -1009,7 +1031,7 @@ static formatdef bigendian_table[] = { {'c', 1, 0, nu_char, np_char}, {'s', 1, 0, NULL}, {'p', 1, 0, NULL}, - {'h', 2, 0, bu_int, bp_int}, + {'h', 2, 0, bu_short, bp_int}, {'H', 2, 0, bu_uint, bp_uint}, {'i', 4, 0, bu_int, bp_int}, {'I', 4, 0, bu_uint, bp_uint}, @@ -1027,18 +1049,37 @@ static formatdef bigendian_table[] = { /* Little-endian routines. *****************************************************/ static PyObject * +lu_short(_structmodulestate *state, const char *p, const formatdef *f) +{ + unsigned long x = 0; + + /* This function is only ever used in the case f->size == 2. */ + assert(f->size == 2); + Py_ssize_t i = 2; + const unsigned char *bytes = (const unsigned char *)p; + do { + x = (x<<8) | bytes[--i]; + } while (i > 0); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x8000U) - 0x8000U; + return PyLong_FromLong(x & 0x8000U ? -1 - (long)(~x) : (long)x); +} + +static PyObject * lu_int(_structmodulestate *state, const char *p, const formatdef *f) { - long x = 0; - Py_ssize_t i = f->size; + unsigned long x = 0; + + /* This function is only ever used in the case f->size == 4. */ + assert(f->size == 4); + Py_ssize_t i = 4; const unsigned char *bytes = (const unsigned char *)p; do { x = (x<<8) | bytes[--i]; } while (i > 0); - /* Extend the sign bit. */ - if (SIZEOF_LONG > f->size) - x |= -(x & (1L << ((8 * f->size) - 1))); - return PyLong_FromLong(x); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x80000000U) - 0x80000000U; + return PyLong_FromLong(x & 0x80000000U ? -1 - (long)(~x) : (long)x); } static PyObject * @@ -1056,16 +1097,19 @@ lu_uint(_structmodulestate *state, const char *p, const formatdef *f) static PyObject * lu_longlong(_structmodulestate *state, const char *p, const formatdef *f) { - long long x = 0; - Py_ssize_t i = f->size; + unsigned long long x = 0; + + /* This function is only ever used in the case f->size == 8. */ + assert(f->size == 8); + Py_ssize_t i = 8; const unsigned char *bytes = (const unsigned char *)p; do { x = (x<<8) | bytes[--i]; } while (i > 0); - /* Extend the sign bit. */ - if (SIZEOF_LONG_LONG > f->size) - x |= -(x & ((long long)1 << ((8 * f->size) - 1))); - return PyLong_FromLongLong(x); + /* Extend sign, avoiding implementation-defined or undefined behaviour. */ + x = (x ^ 0x8000000000000000U) - 0x8000000000000000U; + return PyLong_FromLongLong( + x & 0x8000000000000000U ? -1 - (long long)(~x) : (long long)x); } static PyObject * @@ -1213,7 +1257,7 @@ static formatdef lilendian_table[] = { {'c', 1, 0, nu_char, np_char}, {'s', 1, 0, NULL}, {'p', 1, 0, NULL}, - {'h', 2, 0, lu_int, lp_int}, + {'h', 2, 0, lu_short, lp_int}, {'H', 2, 0, lu_uint, lp_uint}, {'i', 4, 0, lu_int, lp_int}, {'I', 4, 0, lu_uint, lp_uint}, |