From 36c6178d372b075e9c74b786cfb5e47702976b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:31:30 +0100 Subject: gh-126024: fix UBSan failure in `unicodeobject.c:find_first_nonascii` (GH-127566) --- Objects/unicodeobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 463da06..33c4747 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,12 +5083,9 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); #if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) - // x86 and amd64 are little endian and can load unaligned memory. - size_t u = *(const size_t*)p & ASCII_CHAR_MASK; -#else - size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; -#endif + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; if (u) { return (ctz(u) - 7) / 8; } -- cgit v0.12