summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib/codecs.h
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2012-05-15 21:48:04 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2012-05-15 21:48:04 (GMT)
commit63065d761e6c545216b9621982d16dd459abb1f8 (patch)
tree7bfe6f72dcf2ec9604b0a76ca52fca9aa723ce02 /Objects/stringlib/codecs.h
parent12ea86adcea986c5572c597b72253d839f4d303a (diff)
downloadcpython-63065d761e6c545216b9621982d16dd459abb1f8.zip
cpython-63065d761e6c545216b9621982d16dd459abb1f8.tar.gz
cpython-63065d761e6c545216b9621982d16dd459abb1f8.tar.bz2
Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects/stringlib/codecs.h')
-rw-r--r--Objects/stringlib/codecs.h149
1 files changed, 148 insertions, 1 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 366011c..07627d6 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -215,7 +215,6 @@ InvalidContinuation:
goto Return;
}
-#undef LONG_PTR_MASK
#undef ASCII_CHAR_MASK
@@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
#undef MAX_SHORT_UNICHARS
}
+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+ non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+ UTF16-encoded surrogate characters. This is an efficient heuristic,
+ assuming that non-surrogate characters with a code point >= 0x8000 are
+ rare in most input.
+*/
+# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
+ (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+ STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+ int native_ordering)
+{
+ Py_UCS4 ch;
+ const unsigned char *aligned_end =
+ (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
+ const unsigned char *q = *inptr;
+ STRINGLIB_CHAR *p = dest + *outpos;
+ /* Offsets from q for retrieving byte pairs in the right order. */
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+ int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+ int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+ --e;
+
+ while (q < e) {
+ Py_UCS4 ch2;
+ /* First check for possible aligned read of a C 'long'. Unaligned
+ reads are more expensive, better to defer to another iteration. */
+ if (!((size_t) q & LONG_PTR_MASK)) {
+ /* Fast path for runs of in-range non-surrogate chars. */
+ register const unsigned char *_q = q;
+ while (_q < aligned_end) {
+ unsigned long block = * (unsigned long *) _q;
+ if (native_ordering) {
+ /* Can use buffer directly */
+ if (block & FAST_CHAR_MASK)
+ break;
+ }
+ else {
+ /* Need to byte-swap */
+ if (block & SWAB(FAST_CHAR_MASK))
+ break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+ block >>= 8;
+#else
+ block = SWAB(block);
+#endif
+ }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+ p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+ p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+ p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+ p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+ p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+ p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+ p[0] = (STRINGLIB_CHAR)(block >> 16);
+ p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+ p[0] = (STRINGLIB_CHAR)(block >> 48);
+ p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+ p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+ p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+ _q += SIZEOF_LONG;
+ p += SIZEOF_LONG / 2;
+ }
+ q = _q;
+ if (q >= e)
+ break;
+ }
+
+ ch = (q[ihi] << 8) | q[ilo];
+ q += 2;
+ if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+ if (ch > STRINGLIB_MAX_CHAR)
+ /* Out-of-range */
+ goto Return;
+#endif
+ *p++ = (STRINGLIB_CHAR)ch;
+ continue;
+ }
+
+ /* UTF-16 code pair: */
+ if (q >= e)
+ goto UnexpectedEnd;
+ if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+ goto IllegalEncoding;
+ ch2 = (q[ihi] << 8) | q[ilo];
+ q += 2;
+ if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+ goto IllegalSurrogate;
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+ /* Out-of-range */
+ goto Return;
+#else
+ *p++ = (STRINGLIB_CHAR)ch;
+#endif
+ }
+ ch = 0;
+Return:
+ *inptr = q;
+ *outpos = p - dest;
+ return ch;
+UnexpectedEnd:
+ ch = 1;
+ goto Return;
+IllegalEncoding:
+ ch = 2;
+ goto Return;
+IllegalSurrogate:
+ ch = 3;
+ goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+#undef LONG_PTR_MASK
#endif /* STRINGLIB_IS_UNICODE */