summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2022-09-15 00:05:04 (GMT)
committerGitHub <noreply@github.com>2022-09-15 00:05:04 (GMT)
commita83fdf2563aad794f0b0a78e534313bbe050d1eb (patch)
tree296b4a2c187723c5c87d12a3af2513a70020f651 /Include
parent9f1814723f5596115a794a8bec0d053f25dbf32f (diff)
downloadcpython-a83fdf2563aad794f0b0a78e534313bbe050d1eb.zip
cpython-a83fdf2563aad794f0b0a78e534313bbe050d1eb.tar.gz
cpython-a83fdf2563aad794f0b0a78e534313bbe050d1eb.tar.bz2
GH-90997: Improve inline cache performance for MSVC (GH-96781)
Diffstat (limited to 'Include')
-rw-r--r--Include/internal/pycore_code.h93
1 files changed, 19 insertions, 74 deletions
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 7d5fe94..bf59454 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -285,110 +285,55 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
#endif // !Py_STATS
-// Cache values are only valid in memory, so use native endianness.
-#ifdef WORDS_BIGENDIAN
+// Utility functions for reading/writing 32/64-bit values in the inline caches.
+// Great care should be taken to ensure that these functions remain correct and
+// performant! They should compile to just "move" instructions on all supported
+// compilers and platforms.
+
+// We use memcpy to let the C compiler handle unaligned accesses and endianness
+// issues for us. It also seems to produce better code than manual copying for
+// most compilers (see https://blog.regehr.org/archives/959 for more info).
static inline void
write_u32(uint16_t *p, uint32_t val)
{
- p[0] = (uint16_t)(val >> 16);
- p[1] = (uint16_t)(val >> 0);
+ memcpy(p, &val, sizeof(val));
}
static inline void
write_u64(uint16_t *p, uint64_t val)
{
- p[0] = (uint16_t)(val >> 48);
- p[1] = (uint16_t)(val >> 32);
- p[2] = (uint16_t)(val >> 16);
- p[3] = (uint16_t)(val >> 0);
-}
-
-static inline uint32_t
-read_u32(uint16_t *p)
-{
- uint32_t val = 0;
- val |= (uint32_t)p[0] << 16;
- val |= (uint32_t)p[1] << 0;
- return val;
-}
-
-static inline uint64_t
-read_u64(uint16_t *p)
-{
- uint64_t val = 0;
- val |= (uint64_t)p[0] << 48;
- val |= (uint64_t)p[1] << 32;
- val |= (uint64_t)p[2] << 16;
- val |= (uint64_t)p[3] << 0;
- return val;
-}
-
-#else
-
-static inline void
-write_u32(uint16_t *p, uint32_t val)
-{
- p[0] = (uint16_t)(val >> 0);
- p[1] = (uint16_t)(val >> 16);
+ memcpy(p, &val, sizeof(val));
}
static inline void
-write_u64(uint16_t *p, uint64_t val)
+write_obj(uint16_t *p, PyObject *val)
{
- p[0] = (uint16_t)(val >> 0);
- p[1] = (uint16_t)(val >> 16);
- p[2] = (uint16_t)(val >> 32);
- p[3] = (uint16_t)(val >> 48);
+ memcpy(p, &val, sizeof(val));
}
static inline uint32_t
read_u32(uint16_t *p)
{
- uint32_t val = 0;
- val |= (uint32_t)p[0] << 0;
- val |= (uint32_t)p[1] << 16;
+ uint32_t val;
+ memcpy(&val, p, sizeof(val));
return val;
}
static inline uint64_t
read_u64(uint16_t *p)
{
- uint64_t val = 0;
- val |= (uint64_t)p[0] << 0;
- val |= (uint64_t)p[1] << 16;
- val |= (uint64_t)p[2] << 32;
- val |= (uint64_t)p[3] << 48;
+ uint64_t val;
+ memcpy(&val, p, sizeof(val));
return val;
}
-#endif
-
-static inline void
-write_obj(uint16_t *p, PyObject *obj)
-{
- uintptr_t val = (uintptr_t)obj;
-#if SIZEOF_VOID_P == 8
- write_u64(p, val);
-#elif SIZEOF_VOID_P == 4
- write_u32(p, val);
-#else
- #error "SIZEOF_VOID_P must be 4 or 8"
-#endif
-}
-
static inline PyObject *
read_obj(uint16_t *p)
{
- uintptr_t val;
-#if SIZEOF_VOID_P == 8
- val = read_u64(p);
-#elif SIZEOF_VOID_P == 4
- val = read_u32(p);
-#else
- #error "SIZEOF_VOID_P must be 4 or 8"
-#endif
- return (PyObject *)val;
+ PyObject *val;
+ memcpy(&val, p, sizeof(val));
+ return val;
}
/* See Objects/exception_handling_notes.txt for details.