summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/codecs.h87
-rw-r--r--Objects/stringlib/fastsearch.h4
-rw-r--r--Objects/stringlib/find.h23
-rw-r--r--Objects/stringlib/transmogrify.h2
4 files changed, 92 insertions, 24 deletions
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index ee1bf2b..0fc6b58 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -718,6 +718,93 @@ STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
return len - (end - in + 1);
#endif
}
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */
+#elif STRINGLIB_SIZEOF_CHAR == 2
+# define SWAB4(CH, tmp) (tmp = (CH), \
+ ((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))
+ /* high bytes are zero */
+#else
+# define SWAB4(CH, tmp) (tmp = (CH), \
+ tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
+ ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
+#endif
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
+ Py_ssize_t len,
+ PY_UINT32_T **outptr,
+ int native_ordering)
+{
+ PY_UINT32_T *out = *outptr;
+ const STRINGLIB_CHAR *end = in + len;
+ if (native_ordering) {
+ const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+ while (in < unrolled_end) {
+#if STRINGLIB_SIZEOF_CHAR > 1
+ /* check if any character is a surrogate character */
+ if (((in[0] ^ 0xd800) &
+ (in[1] ^ 0xd800) &
+ (in[2] ^ 0xd800) &
+ (in[3] ^ 0xd800) & 0xf800) == 0)
+ break;
+#endif
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+ in += 4; out += 4;
+ }
+ while (in < end) {
+ Py_UCS4 ch;
+ ch = *in++;
+#if STRINGLIB_SIZEOF_CHAR > 1
+ if (Py_UNICODE_IS_SURROGATE(ch)) {
+ /* reject surrogate characters (U+D800-U+DFFF) */
+ goto fail;
+ }
+#endif
+ *out++ = ch;
+ }
+ } else {
+ const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+ while (in < unrolled_end) {
+#if STRINGLIB_SIZEOF_CHAR > 1
+ Py_UCS4 ch1, ch2, ch3, ch4;
+ /* check if any character is a surrogate character */
+ if (((in[0] ^ 0xd800) &
+ (in[1] ^ 0xd800) &
+ (in[2] ^ 0xd800) &
+ (in[3] ^ 0xd800) & 0xf800) == 0)
+ break;
+#endif
+ out[0] = SWAB4(in[0], ch1);
+ out[1] = SWAB4(in[1], ch2);
+ out[2] = SWAB4(in[2], ch3);
+ out[3] = SWAB4(in[3], ch4);
+ in += 4; out += 4;
+ }
+ while (in < end) {
+ Py_UCS4 ch = *in++;
+#if STRINGLIB_SIZEOF_CHAR > 1
+ if (Py_UNICODE_IS_SURROGATE(ch)) {
+ /* reject surrogate characters (U+D800-U+DFFF) */
+ goto fail;
+ }
+#endif
+ *out++ = SWAB4(ch, ch);
+ }
+ }
+ *outptr = out;
+ return len;
+#if STRINGLIB_SIZEOF_CHAR > 1
+ fail:
+ *outptr = out;
+ return len - (end - in + 1);
+#endif
+}
+#undef SWAB4
+
#endif
#endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index cd7cac4..cda68e7 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -36,7 +36,7 @@
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
STRINGLIB_CHAR ch, unsigned char needle,
- Py_ssize_t maxcount, int mode)
+ int mode)
{
if (mode == FAST_SEARCH) {
const STRINGLIB_CHAR *ptr = s;
@@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (needle != 0)
#endif
return STRINGLIB(fastsearch_memchr_1char)
- (s, n, p[0], needle, maxcount, mode);
+ (s, n, p[0], needle, mode);
}
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 518e012..14815f6 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
- if (str_len < 0)
- return -1;
+ assert(str_len >= 0);
if (sub_len == 0)
return offset;
@@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{
Py_ssize_t pos;
- if (str_len < 0)
- return -1;
+ assert(str_len >= 0);
if (sub_len == 0)
return str_len + offset;
@@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
-/* helper macro to fixup start/end slice values */
-#define ADJUST_INDICES(start, end, len) \
- if (end > len) \
- end = len; \
- else if (end < 0) { \
- end += len; \
- if (end < 0) \
- end = 0; \
- } \
- if (start < 0) { \
- start += len; \
- if (start < 0) \
- start = 0; \
- }
-
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- ADJUST_INDICES(start, end, str_len);
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
}
@@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- ADJUST_INDICES(start, end, str_len);
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
}
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index cae6ea1..b559b53 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -59,7 +59,7 @@ stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
j = 0;
q = STRINGLIB_STR(u);
-
+
for (p = STRINGLIB_STR(self); p < e; p++) {
if (*p == '\t') {
if (tabsize > 0) {