diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2012-05-15 21:48:04 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2012-05-15 21:48:04 (GMT) |
commit | 63065d761e6c545216b9621982d16dd459abb1f8 (patch) | |
tree | 7bfe6f72dcf2ec9604b0a76ca52fca9aa723ce02 /Objects/stringlib | |
parent | 12ea86adcea986c5572c597b72253d839f4d303a (diff) | |
download | cpython-63065d761e6c545216b9621982d16dd459abb1f8.zip cpython-63065d761e6c545216b9621982d16dd459abb1f8.tar.gz cpython-63065d761e6c545216b9621982d16dd459abb1f8.tar.bz2 |
Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
Patch by Serhiy Storchaka.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 149 |
1 files changed, 148 insertions, 1 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 366011c..07627d6 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -215,7 +215,6 @@ InvalidContinuation: goto Return; } -#undef LONG_PTR_MASK #undef ASCII_CHAR_MASK @@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #undef MAX_SHORT_UNICHARS } +/* The pattern for constructing UCS2-repeated masks. */ +#if SIZEOF_LONG == 8 +# define UCS2_REPEAT_MASK 0x0001000100010001ul +#elif SIZEOF_LONG == 4 +# define UCS2_REPEAT_MASK 0x00010001ul +#else +# error C 'long' size should be either 4 or 8! +#endif + +/* The mask for fast checking. */ +#if STRINGLIB_SIZEOF_CHAR == 1 +/* The mask for fast checking of whether a C 'long' contains a + non-ASCII or non-Latin1 UTF16-encoded characters. */ +# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR)) +#else +/* The mask for fast checking of whether a C 'long' may contain + UTF16-encoded surrogate characters. This is an efficient heuristic, + assuming that non-surrogate characters with a code point >= 0x8000 are + rare in most input. +*/ +# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u) +#endif +/* The mask for fast byte-swapping. */ +#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu) +/* Swap bytes. */ +#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \ + (((value) & STRIPPED_MASK) << 8)) + +Py_LOCAL_INLINE(Py_UCS4) +STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, + STRINGLIB_CHAR *dest, Py_ssize_t *outpos, + int native_ordering) +{ + Py_UCS4 ch; + const unsigned char *aligned_end = + (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); + const unsigned char *q = *inptr; + STRINGLIB_CHAR *p = dest + *outpos; + /* Offsets from q for retrieving byte pairs in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int ihi = !!native_ordering, ilo = !native_ordering; +#else + int ihi = !native_ordering, ilo = !!native_ordering; +#endif + --e; + + while (q < e) { + Py_UCS4 ch2; + /* First check for possible aligned read of a C 'long'. Unaligned + reads are more expensive, better to defer to another iteration. */ + if (!((size_t) q & LONG_PTR_MASK)) { + /* Fast path for runs of in-range non-surrogate chars. */ + register const unsigned char *_q = q; + while (_q < aligned_end) { + unsigned long block = * (unsigned long *) _q; + if (native_ordering) { + /* Can use buffer directly */ + if (block & FAST_CHAR_MASK) + break; + } + else { + /* Need to byte-swap */ + if (block & SWAB(FAST_CHAR_MASK)) + break; +#if STRINGLIB_SIZEOF_CHAR == 1 + block >>= 8; +#else + block = SWAB(block); +#endif + } +#ifdef BYTEORDER_IS_LITTLE_ENDIAN +# if SIZEOF_LONG == 4 + p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); + p[1] = (STRINGLIB_CHAR)(block >> 16); +# elif SIZEOF_LONG == 8 + p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); + p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); + p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); + p[3] = (STRINGLIB_CHAR)(block >> 48); +# endif +#else +# if SIZEOF_LONG == 4 + p[0] = (STRINGLIB_CHAR)(block >> 16); + p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu); +# elif SIZEOF_LONG == 8 + p[0] = (STRINGLIB_CHAR)(block >> 48); + p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); + p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); + p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu); +# endif +#endif + _q += SIZEOF_LONG; + p += SIZEOF_LONG / 2; + } + q = _q; + if (q >= e) + break; + } + + ch = (q[ihi] << 8) | q[ilo]; + q += 2; + if (!Py_UNICODE_IS_SURROGATE(ch)) { +#if STRINGLIB_SIZEOF_CHAR < 2 + if (ch > STRINGLIB_MAX_CHAR) + /* Out-of-range */ + goto Return; +#endif + *p++ = (STRINGLIB_CHAR)ch; + continue; + } + + /* UTF-16 code pair: */ + if (q >= e) + goto UnexpectedEnd; + if (!Py_UNICODE_IS_HIGH_SURROGATE(ch)) + goto IllegalEncoding; + ch2 = (q[ihi] << 8) | q[ilo]; + q += 2; + if (!Py_UNICODE_IS_LOW_SURROGATE(ch2)) + goto IllegalSurrogate; + ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2); +#if STRINGLIB_SIZEOF_CHAR < 4 + /* Out-of-range */ + goto Return; +#else + *p++ = (STRINGLIB_CHAR)ch; +#endif + } + ch = 0; +Return: + *inptr = q; + *outpos = p - dest; + return ch; +UnexpectedEnd: + ch = 1; + goto Return; +IllegalEncoding: + ch = 2; + goto Return; +IllegalSurrogate: + ch = 3; + goto Return; +} +#undef UCS2_REPEAT_MASK +#undef FAST_CHAR_MASK +#undef STRIPPED_MASK +#undef SWAB +#undef LONG_PTR_MASK #endif /* STRINGLIB_IS_UNICODE */ |