diff options
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/codecs.h | 87 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 4 | ||||
-rw-r--r-- | Objects/stringlib/find.h | 23 | ||||
-rw-r--r-- | Objects/stringlib/transmogrify.h | 2 |
4 files changed, 92 insertions, 24 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index ee1bf2b..0fc6b58 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -718,6 +718,93 @@ STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in, return len - (end - in + 1); #endif } + +#if STRINGLIB_SIZEOF_CHAR == 1 +# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */ +#elif STRINGLIB_SIZEOF_CHAR == 2 +# define SWAB4(CH, tmp) (tmp = (CH), \ + ((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8)) + /* high bytes are zero */ +#else +# define SWAB4(CH, tmp) (tmp = (CH), \ + tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \ + ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu)) +#endif +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in, + Py_ssize_t len, + PY_UINT32_T **outptr, + int native_ordering) +{ + PY_UINT32_T *out = *outptr; + const STRINGLIB_CHAR *end = in + len; + if (native_ordering) { + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { +#if STRINGLIB_SIZEOF_CHAR > 1 + /* check if any character is a surrogate character */ + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; +#endif + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + in += 4; out += 4; + } + while (in < end) { + Py_UCS4 ch; + ch = *in++; +#if STRINGLIB_SIZEOF_CHAR > 1 + if (Py_UNICODE_IS_SURROGATE(ch)) { + /* reject surrogate characters (U+D800-U+DFFF) */ + goto fail; + } +#endif + *out++ = ch; + } + } else { + const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); + while (in < unrolled_end) { +#if STRINGLIB_SIZEOF_CHAR > 1 + Py_UCS4 ch1, ch2, ch3, ch4; + /* check if any character is a surrogate character */ + if (((in[0] ^ 0xd800) & + (in[1] ^ 0xd800) & + (in[2] ^ 0xd800) & + (in[3] ^ 0xd800) & 0xf800) == 0) + break; +#endif + out[0] = SWAB4(in[0], ch1); + out[1] = SWAB4(in[1], ch2); + out[2] = SWAB4(in[2], ch3); + out[3] = SWAB4(in[3], ch4); + in += 4; out += 4; + } + while (in < end) { + Py_UCS4 ch = *in++; +#if STRINGLIB_SIZEOF_CHAR > 1 + if (Py_UNICODE_IS_SURROGATE(ch)) { + /* reject surrogate characters (U+D800-U+DFFF) */ + goto fail; + } +#endif + *out++ = SWAB4(ch, ch); + } + } + *outptr = out; + return len; +#if STRINGLIB_SIZEOF_CHAR > 1 + fail: + *outptr = out; + return len - (end - in + 1); +#endif +} +#undef SWAB4 + #endif #endif /* STRINGLIB_IS_UNICODE */ diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index cd7cac4..cda68e7 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -36,7 +36,7 @@ Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch, unsigned char needle, - Py_ssize_t maxcount, int mode) + int mode) { if (mode == FAST_SEARCH) { const STRINGLIB_CHAR *ptr = s; @@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, if (needle != 0) #endif return STRINGLIB(fastsearch_memchr_1char) - (s, n, p[0], needle, maxcount, mode); + (s, n, p[0], needle, mode); } if (mode == FAST_COUNT) { for (i = 0; i < n; i++) diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index 518e012..14815f6 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, { Py_ssize_t pos; - if (str_len < 0) - return -1; + assert(str_len >= 0); if (sub_len == 0) return offset; @@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, { Py_ssize_t pos; - if (str_len < 0) - return -1; + assert(str_len >= 0); if (sub_len == 0) return str_len + offset; @@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return pos; } -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, Py_ssize_t start, Py_ssize_t end) { - ADJUST_INDICES(start, end, str_len); return STRINGLIB(find)(str + start, end - start, sub, sub_len, start); } @@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, Py_ssize_t start, Py_ssize_t end) { - ADJUST_INDICES(start, end, str_len); return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start); } diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h index cae6ea1..b559b53 100644 --- a/Objects/stringlib/transmogrify.h +++ b/Objects/stringlib/transmogrify.h @@ -59,7 +59,7 @@ stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) j = 0; q = STRINGLIB_STR(u); - + for (p = STRINGLIB_STR(self); p < e; p++) { if (*p == '\t') { if (tabsize > 0) { |