summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2012-06-15 20:15:23 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2012-06-15 20:15:23 (GMT)
commit27f6a3b0bff9f100331707a4a461446ffc18baae (patch)
treea90dd1ba1c9d07e8d612202e236ace147c433cd6 /Objects/stringlib
parent3049f1243ec85590f64962994f055da66c85a15e (diff)
downloadcpython-27f6a3b0bff9f100331707a4a461446ffc18baae.zip
cpython-27f6a3b0bff9f100331707a4a461446ffc18baae.tar.gz
cpython-27f6a3b0bff9f100331707a4a461446ffc18baae.tar.bz2
Issue #15026: utf-16 encoding is now significantly faster (up to 10x).
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/codecs.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 07627d6..fb35493 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -562,4 +562,68 @@ IllegalSurrogate:
#undef STRIPPED_MASK
#undef SWAB
#undef LONG_PTR_MASK
+
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(utf16_encode)(unsigned short *out,
+ const STRINGLIB_CHAR *in,
+ Py_ssize_t len,
+ int native_ordering)
+{
+ const STRINGLIB_CHAR *end = in + len;
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define SWAB2(CH) ((CH) << 8)
+#else
+# define SWAB2(CH) (((CH) << 8) | ((CH) >> 8))
+#endif
+#if STRINGLIB_MAX_CHAR < 0x10000
+ if (native_ordering) {
+# if STRINGLIB_SIZEOF_CHAR == 2
+ Py_MEMCPY(out, in, 2 * len);
+# else
+ _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out);
+# endif
+ } else {
+ const STRINGLIB_CHAR *unrolled_end = in + (len & ~ (Py_ssize_t) 3);
+ while (in < unrolled_end) {
+ out[0] = SWAB2(in[0]);
+ out[1] = SWAB2(in[1]);
+ out[2] = SWAB2(in[2]);
+ out[3] = SWAB2(in[3]);
+ in += 4; out += 4;
+ }
+ while (in < end) {
+ *out++ = SWAB2(*in);
+ ++in;
+ }
+ }
+#else
+ if (native_ordering) {
+ while (in < end) {
+ Py_UCS4 ch = *in++;
+ if (ch < 0x10000)
+ *out++ = ch;
+ else {
+ out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+ out[1] = Py_UNICODE_LOW_SURROGATE(ch);
+ out += 2;
+ }
+ }
+ } else {
+ while (in < end) {
+ Py_UCS4 ch = *in++;
+ if (ch < 0x10000)
+ *out++ = SWAB2((Py_UCS2)ch);
+ else {
+ Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
+ Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
+ out[0] = SWAB2(ch1);
+ out[1] = SWAB2(ch2);
+ out += 2;
+ }
+ }
+ }
+#endif
+#undef SWAB2
+}
#endif /* STRINGLIB_IS_UNICODE */