bpo-43179: Generalise alignment for optimised string routines (GH-24624)

* Remove m68k-specific hack from ascii_decode On m68k, alignments of primitives is more relaxed, with 4-byte and 8-byte types only requiring 2-byte alignment, thus using sizeof(size_t) does not work. Instead, use the portable alternative. Note that this is a minimal fix that only relaxes the assertion and the condition for when to use the optimised version remains overly strict. Such issues will be fixed tree-wide in the next commit. NB: In C11 we could use _Alignof(size_t) instead, but for compatibility we use autoconf. * Optimise string routines for architectures with non-natural alignment C only requires that sizeof(x) is a multiple of alignof(x), not that the two are equal. Thus anywhere where we optimise based on alignment we should be using alignof(x) not sizeof(x). This is more annoying than it would be in C11 where we could just use _Alignof(x) (and alignof(x) in C++11), but since we still require only C99 we must plumb the information all the way from autoconf through the various typedefs and defines.
author: Jessica Clarke <jrtc27@jrtc27.com> 2021-03-31 10:12:39 (GMT)
committer: GitHub <noreply@github.com> 2021-03-31 10:12:39 (GMT)
commit: dec075754960dd85972ce5170df76e862f966132 (patch)
tree: d03d976a2f077f0f07ebfd9ed35f2c91490de89d /Objects
parent: cfa176685a5e788bafc7749d7a93f43ea3e4de9f (diff)
download: cpython-dec075754960dd85972ce5170df76e862f966132.zip
cpython-dec075754960dd85972ce5170df76e862f966132.tar.gz
cpython-dec075754960dd85972ce5170df76e862f966132.tar.bz2
4 files changed, 14 insertions, 30 deletions
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c
index 1512086..994fb8a 100644
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -115,15 +115,14 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
 {
     const char *p = cptr;
     const char *end = p + len;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
 
     while (p < end) {
         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
            for an explanation. */
-        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
+        if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
             /* Help allocation */
             const char *_p = p;
-            while (_p < aligned_end) {
+            while (_p + SIZEOF_SIZE_T <= end) {
                 size_t value = *(const size_t *) _p;
                 if (value & ASCII_CHAR_MASK) {
                     Py_RETURN_FALSE;
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index b6ca404..b17cda1 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -26,7 +26,6 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
 {
     Py_UCS4 ch;
     const char *s = *inptr;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
     STRINGLIB_CHAR *p = dest + *outpos;
 
     while (s < end) {
@@ -40,11 +39,11 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
                First, check if we can do an aligned read, as most CPUs have
                a penalty for unaligned reads.
             */
-            if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
+            if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
                 /* Help register allocation */
                 const char *_s = s;
                 STRINGLIB_CHAR *_p = p;
-                while (_s < aligned_end) {
+                while (_s + SIZEOF_SIZE_T <= end) {
                     /* Read a whole size_t at a time (either 4 or 8 bytes),
                        and do a fast unrolled copy if it only contains ASCII
                        characters. */
@@ -496,8 +495,6 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
                         int native_ordering)
 {
     Py_UCS4 ch;
-    const unsigned char *aligned_end =
-            (const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
     const unsigned char *q = *inptr;
     STRINGLIB_CHAR *p = dest + *outpos;
     /* Offsets from q for retrieving byte pairs in the right order. */
@@ -512,10 +509,10 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
         Py_UCS4 ch2;
         /* First check for possible aligned read of a C 'long'. Unaligned
            reads are more expensive, better to defer to another iteration. */
-        if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
+        if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
             /* Fast path for runs of in-range non-surrogate chars. */
             const unsigned char *_q = q;
-            while (_q < aligned_end) {
+            while (_q + SIZEOF_LONG <= e) {
                 unsigned long block = * (const unsigned long *) _q;
                 if (native_ordering) {
                     /* Can use buffer directly */
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
index 3319a46..b9ffdfc 100644
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -20,14 +20,12 @@ Py_LOCAL_INLINE(Py_UCS4)
 STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
 {
     const unsigned char *p = (const unsigned char *) begin;
-    const unsigned char *aligned_end =
-            (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
 
     while (p < end) {
-        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
+        if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
             /* Help register allocation */
             const unsigned char *_p = p;
-            while (_p < aligned_end) {
+            while (_p + SIZEOF_SIZE_T <= end) {
                 size_t value = *(const size_t *) _p;
                 if (value & UCS1_ASCII_CHAR_MASK)
                     return 255;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index a7a3151..f6bf505 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5070,25 +5070,16 @@ static Py_ssize_t
 ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
 {
     const char *p = start;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
-
-    /*
-     * Issue #17237: m68k is a bit different from most architectures in
-     * that objects do not use "natural alignment" - for example, int and
-     * long are only aligned at 2-byte boundaries.  Therefore the assert()
-     * won't work; also, tests have shown that skipping the "optimised
-     * version" will even speed up m68k.
-     */
-#if !defined(__m68k__)
+
 #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
-    assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
-    if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
+    assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
+    if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
         /* Fast path, see in STRINGLIB(utf8_decode) for
            an explanation. */
         /* Help allocation */
         const char *_p = p;
         Py_UCS1 * q = dest;
-        while (_p < aligned_end) {
+        while (_p + SIZEOF_SIZE_T <= end) {
             size_t value = *(const size_t *) _p;
             if (value & ASCII_CHAR_MASK)
                 break;
@@ -5105,14 +5096,13 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
         return p - start;
     }
 #endif
-#endif
     while (p < end) {
         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
            for an explanation. */
-        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
+        if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
             /* Help allocation */
             const char *_p = p;
-            while (_p < aligned_end) {
+            while (_p + SIZEOF_SIZE_T <= end) {
                 size_t value = *(const size_t *) _p;
                 if (value & ASCII_CHAR_MASK)
                     break;
author	Jessica Clarke <jrtc27@jrtc27.com>	2021-03-31 10:12:39 (GMT)
committer	GitHub <noreply@github.com>	2021-03-31 10:12:39 (GMT)
commit	dec075754960dd85972ce5170df76e862f966132 (patch)
tree	d03d976a2f077f0f07ebfd9ed35f2c91490de89d /Objects
parent	cfa176685a5e788bafc7749d7a93f43ea3e4de9f (diff)
download	cpython-dec075754960dd85972ce5170df76e862f966132.zip cpython-dec075754960dd85972ce5170df76e862f966132.tar.gz cpython-dec075754960dd85972ce5170df76e862f966132.tar.bz2