needforspeed: added Py_MEMCPY macro (currently tuned for Visual C only),

and use it for string copy operations. this gives a 20% speedup on some string benchmarks.
author: Fredrik Lundh <fredrik@pythonware.com> 2006-05-28 12:06:46 (GMT)
committer: Fredrik Lundh <fredrik@pythonware.com> 2006-05-28 12:06:46 (GMT)
commit: 80f8e80c15a784a84f77f4895318d13b831b017e (patch)
tree: 407701c5c11658698cb8ddde6c2a6ef27405bc08 /Include
parent: 5e9d6cfbda8f968a849d5235b75b32e7175ad8fd (diff)
download: cpython-80f8e80c15a784a84f77f4895318d13b831b017e.zip
cpython-80f8e80c15a784a84f77f4895318d13b831b017e.tar.gz
cpython-80f8e80c15a784a84f77f4895318d13b831b017e.tar.bz2
2 files changed, 23 insertions, 9 deletions
diff --git a/Include/pyport.h b/Include/pyport.h
index 74ce993..47b9f70 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -174,6 +174,27 @@ typedef Py_intptr_t	Py_ssize_t;
 #define Py_LOCAL_INLINE(type) static type
 #endif
 
+/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks
+ * are often very short.  While most platforms have highly optimized code for
+ * large transfers, the setup costs for memcpy are often quite high.  MEMCPY
+ * solves this by doing short copies "in line".
+ */
+
+#if defined(_MSC_VER)
+#define Py_MEMCPY(target, source, length) do {				\
+		size_t i_, n_ = (length);				\
+		char *t_ = (void*) (target);				\
+		const char *s_ = (void*) (source);			\
+		if (n_ >= 16)						\
+			memcpy(t_, s_, n_);				\
+		else							\
+			for (i_ = 0; i_ < n_; i_++)			\
+				t_[i_] = s_[i_];			\
+	} while (0)
+#else
+#define Py_MEMCPY memcpy
+#endif
+
 #include <stdlib.h>
 
 #include <math.h> /* Moved here from the math section, before extern "C" */
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0531aed..8c39cfe 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -357,15 +357,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
         Py_UNICODE_ISDIGIT(ch) || \
         Py_UNICODE_ISNUMERIC(ch))
 
-/* memcpy has a considerable setup overhead on many platforms; use a
-   loop for short strings (the "16" below is pretty arbitary) */
-#define Py_UNICODE_COPY(target, source, length) do\
-    {Py_ssize_t i_; Py_UNICODE *t_ = (target); const Py_UNICODE *s_ = (source);\
-      if (length > 16)\
-        memcpy(t_, s_, (length)*sizeof(Py_UNICODE));\
-      else\
-        for (i_ = 0; i_ < (length); i_++) t_[i_] = s_[i_];\
-    } while (0)
+#define Py_UNICODE_COPY(target, source, length)				\
+	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
 
 #define Py_UNICODE_FILL(target, value, length) do\
     {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
author	Fredrik Lundh <fredrik@pythonware.com>	2006-05-28 12:06:46 (GMT)
committer	Fredrik Lundh <fredrik@pythonware.com>	2006-05-28 12:06:46 (GMT)
commit	80f8e80c15a784a84f77f4895318d13b831b017e (patch)
tree	407701c5c11658698cb8ddde6c2a6ef27405bc08 /Include
parent	5e9d6cfbda8f968a849d5235b75b32e7175ad8fd (diff)
download	cpython-80f8e80c15a784a84f77f4895318d13b831b017e.zip cpython-80f8e80c15a784a84f77f4895318d13b831b017e.tar.gz cpython-80f8e80c15a784a84f77f4895318d13b831b017e.tar.bz2