diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-06-15 20:15:23 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-06-15 20:15:23 (GMT) |
commit | 27f6a3b0bff9f100331707a4a461446ffc18baae (patch) | |
tree | a90dd1ba1c9d07e8d612202e236ace147c433cd6 /Objects/stringlib | |
parent | 3049f1243ec85590f64962994f055da66c85a15e (diff) | |
download | cpython-27f6a3b0bff9f100331707a4a461446ffc18baae.zip cpython-27f6a3b0bff9f100331707a4a461446ffc18baae.tar.gz cpython-27f6a3b0bff9f100331707a4a461446ffc18baae.tar.bz2 |
Issue #15026: utf-16 encoding is now significantly faster (up to 10x).
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 07627d6..fb35493 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -562,4 +562,68 @@ IllegalSurrogate: #undef STRIPPED_MASK #undef SWAB #undef LONG_PTR_MASK + + +Py_LOCAL_INLINE(void) +STRINGLIB(utf16_encode)(unsigned short *out, + const STRINGLIB_CHAR *in, + Py_ssize_t len, + int native_ordering) +{ + const STRINGLIB_CHAR *end = in + len; +#if STRINGLIB_SIZEOF_CHAR == 1 +# define SWAB2(CH) ((CH) << 8) +#else +# define SWAB2(CH) (((CH) << 8) | ((CH) >> 8)) +#endif +#if STRINGLIB_MAX_CHAR < 0x10000 + if (native_ordering) { +# if STRINGLIB_SIZEOF_CHAR == 2 + Py_MEMCPY(out, in, 2 * len); +# else + _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out); +# endif + } else { + const STRINGLIB_CHAR *unrolled_end = in + (len & ~ (Py_ssize_t) 3); + while (in < unrolled_end) { + out[0] = SWAB2(in[0]); + out[1] = SWAB2(in[1]); + out[2] = SWAB2(in[2]); + out[3] = SWAB2(in[3]); + in += 4; out += 4; + } + while (in < end) { + *out++ = SWAB2(*in); + ++in; + } + } +#else + if (native_ordering) { + while (in < end) { + Py_UCS4 ch = *in++; + if (ch < 0x10000) + *out++ = ch; + else { + out[0] = Py_UNICODE_HIGH_SURROGATE(ch); + out[1] = Py_UNICODE_LOW_SURROGATE(ch); + out += 2; + } + } + } else { + while (in < end) { + Py_UCS4 ch = *in++; + if (ch < 0x10000) + *out++ = SWAB2((Py_UCS2)ch); + else { + Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch); + Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch); + out[0] = SWAB2(ch1); + out[1] = SWAB2(ch2); + out += 2; + } + } + } +#endif +#undef SWAB2 +} #endif /* STRINGLIB_IS_UNICODE */ |