12 files changed, 636 insertions, 317 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 60d919e..ab506d6 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -16,10 +16,6 @@ STRINGLIB_EMPTY
     a PyObject representing the empty string, only to be used if
     STRINGLIB_MUTABLE is 0
 
-int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
-
-    compares two strings. returns 0 if they match, and non-zero if not.
-
 Py_ssize_t STRINGLIB_LEN(PyObject*)
 
     returns the length of the given string object (which must be of the
diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h
index eba37e9..de34f96 100644
--- a/Objects/stringlib/count.h
+++ b/Objects/stringlib/count.h
@@ -9,28 +9,22 @@
 
 Py_LOCAL_INLINE(Py_ssize_t)
 stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
-                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len)
+                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                Py_ssize_t maxcount)
 {
     Py_ssize_t count;
 
     if (str_len < 0)
         return 0; /* start > len(str) */
     if (sub_len == 0)
-        return str_len + 1;
+        return (str_len < maxcount) ? str_len + 1 : maxcount;
 
-    count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT);
+    count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
 
     if (count < 0)
-        count = 0; /* no match */
+        return 0; /* no match */
 
     return count;
 }
 
 #endif
-
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h
index 8951276..739cf3d 100644
--- a/Objects/stringlib/ctype.h
+++ b/Objects/stringlib/ctype.h
@@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)
                     STRINGLIB_LEN(self));
     return newobj;
 }
-
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 23bccfb..e231c58 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -5,7 +5,7 @@
 
 /* fast search/count implementation, based on a mix between boyer-
    moore and horspool, with a few more bells and whistles on the top.
-   for some more background, see: http://effbot.org/stringlib.htm */
+   for some more background, see: http://effbot.org/zone/stringlib.htm */
 
 /* note: fastsearch may access s[n], which isn't a problem when using
    Python's ordinary string types, but may cause problems if you're
@@ -16,19 +16,35 @@
 
 #define FAST_COUNT 0
 #define FAST_SEARCH 1
+#define FAST_RSEARCH 2
+
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch)     \
+    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
 
 Py_LOCAL_INLINE(Py_ssize_t)
 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
            const STRINGLIB_CHAR* p, Py_ssize_t m,
-           int mode)
+           Py_ssize_t maxcount, int mode)
 {
-    long mask;
+    unsigned long mask;
     Py_ssize_t skip, count = 0;
     Py_ssize_t i, j, mlast, w;
 
     w = n - m;
 
-    if (w < 0)
+    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
         return -1;
 
     /* look for special cases */
@@ -38,54 +54,101 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
         /* use special case for 1-character strings */
         if (mode == FAST_COUNT) {
             for (i = 0; i < n; i++)
-                if (s[i] == p[0])
+                if (s[i] == p[0]) {
                     count++;
+                    if (count == maxcount)
+                        return maxcount;
+                }
             return count;
-        } else {
+        } else if (mode == FAST_SEARCH) {
             for (i = 0; i < n; i++)
                 if (s[i] == p[0])
                     return i;
+        } else {    /* FAST_RSEARCH */
+            for (i = n - 1; i > -1; i--)
+                if (s[i] == p[0])
+                    return i;
         }
         return -1;
     }
 
     mlast = m - 1;
-
-    /* create compressed boyer-moore delta 1 table */
     skip = mlast - 1;
-    /* process pattern[:-1] */
-    for (mask = i = 0; i < mlast; i++) {
-        mask |= (1 << (p[i] & 0x1F));
-        if (p[i] == p[mlast])
-            skip = mlast - i - 1;
-    }
-    /* process pattern[-1] outside the loop */
-    mask |= (1 << (p[mlast] & 0x1F));
-
-    for (i = 0; i <= w; i++) {
-        /* note: using mlast in the skip path slows things down on x86 */
-        if (s[i+m-1] == p[m-1]) {
-            /* candidate match */
-            for (j = 0; j < mlast; j++)
-                if (s[i+j] != p[j])
-                    break;
-            if (j == mlast) {
-                /* got a match! */
-                if (mode != FAST_COUNT)
+    mask = 0;
+
+    if (mode != FAST_RSEARCH) {
+
+        /* create compressed boyer-moore delta 1 table */
+
+        /* process pattern[:-1] */
+        for (i = 0; i < mlast; i++) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[mlast])
+                skip = mlast - i - 1;
+        }
+        /* process pattern[-1] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
+
+        for (i = 0; i <= w; i++) {
+            /* note: using mlast in the skip path slows things down on x86 */
+            if (s[i+m-1] == p[m-1]) {
+                /* candidate match */
+                for (j = 0; j < mlast; j++)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == mlast) {
+                    /* got a match! */
+                    if (mode != FAST_COUNT)
+                        return i;
+                    count++;
+                    if (count == maxcount)
+                        return maxcount;
+                    i = i + mlast;
+                    continue;
+                }
+                /* miss: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                    i = i + m;
+                else
+                    i = i + skip;
+            } else {
+                /* skip: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                    i = i + m;
+            }
+        }
+    } else {    /* FAST_RSEARCH */
+
+        /* create compressed boyer-moore delta 1 table */
+
+        /* process pattern[0] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[0]);
+        /* process pattern[:0:-1] */
+        for (i = mlast; i > 0; i--) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[0])
+                skip = i - 1;
+        }
+
+        for (i = w; i >= 0; i--) {
+            if (s[i] == p[0]) {
+                /* candidate match */
+                for (j = mlast; j > 0; j--)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == 0)
+                    /* got a match! */
                     return i;
-                count++;
-                i = i + mlast;
-                continue;
+                /* miss: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
+                else
+                    i = i - skip;
+            } else {
+                /* skip: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
             }
-            /* miss: check if next character is part of pattern */
-            if (!(mask & (1 << (s[i+m] & 0x1F))))
-                i = i + m;
-            else
-                i = i + skip;
-        } else {
-            /* skip: check if next character is part of pattern */
-            if (!(mask & (1 << (s[i+m] & 0x1F))))
-                i = i + m;
         }
     }
 
@@ -95,10 +158,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
 }
 
 #endif
-
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 4407d71..ce615dc 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
     if (sub_len == 0)
         return offset;
 
-    pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH);
+    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
 
     if (pos >= 0)
         pos += offset;
@@ -32,42 +32,43 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                 const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                 Py_ssize_t offset)
 {
-    /* XXX - create reversefastsearch helper! */
-    if (sub_len == 0) {
-        if (str_len < 0)
-            return -1;
-	return str_len + offset;
-    } else {
-	Py_ssize_t j, pos = -1;
-	for (j = str_len - sub_len; j >= 0; --j)
-            if (STRINGLIB_CMP(str+j, sub, sub_len) == 0) {
-                pos = j + offset;
-                break;
-            }
-        return pos;
-    }
+    Py_ssize_t pos;
+
+    if (str_len < 0)
+        return -1;
+    if (sub_len == 0)
+        return str_len + offset;
+
+    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
+
+    if (pos >= 0)
+        pos += offset;
+
+    return pos;
 }
 
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len)         \
+    if (end > len)                              \
+        end = len;                              \
+    else if (end < 0) {                         \
+        end += len;                             \
+        if (end < 0)                            \
+            end = 0;                            \
+    }                                           \
+    if (start < 0) {                            \
+        start += len;                           \
+        if (start < 0)                          \
+            start = 0;                          \
+    }
+
 Py_LOCAL_INLINE(Py_ssize_t)
 stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                      Py_ssize_t start, Py_ssize_t end)
 {
-    if (start < 0)
-        start += str_len;
-    if (start < 0)
-        start = 0;
-    if (end > str_len)
-        end = str_len;
-    if (end < 0)
-        end += str_len;
-    if (end < 0)
-        end = 0;
-
-    return stringlib_find(
-        str + start, end - start,
-        sub, sub_len, start
-        );
+    ADJUST_INDICES(start, end, str_len);
+    return stringlib_find(str + start, end - start, sub, sub_len, start);
 }
 
 Py_LOCAL_INLINE(Py_ssize_t)
@@ -75,17 +76,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
                       const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
                       Py_ssize_t start, Py_ssize_t end)
 {
-    if (start < 0)
-        start += str_len;
-    if (start < 0)
-        start = 0;
-    if (end > str_len)
-        end = str_len;
-    if (end < 0)
-        end += str_len;
-    if (end < 0)
-        end = 0;
-
+    ADJUST_INDICES(start, end, str_len);
     return stringlib_rfind(str + start, end - start, sub, sub_len, start);
 }
 
@@ -100,7 +91,7 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)
         ) != -1;
 }
 
-#endif /* STRINGLIB_STR */
+#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
 
 /*
 This function is a helper for the "find" family (find, rfind, index,
@@ -149,7 +140,7 @@ stringlib_parse_args_finds(const char * function_name, PyObject *args,
 
 #undef FORMAT_BUFFER_SIZE
 
-#ifdef FROM_UNICODE
+#if STRINGLIB_IS_UNICODE
 
 /*
 Wraps stringlib_parse_args_finds() and additionally ensures that the
@@ -179,13 +170,6 @@ stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
     return 0;
 }
 
-#endif /* FROM_UNICODE */
+#endif /* STRINGLIB_IS_UNICODE */
 
 #endif /* STRINGLIB_FIND_H */
-
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h
index 4770863..4fdc62d 100644
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -32,7 +32,7 @@ unknown_presentation_type(STRINGLIB_CHAR presentation_type,
         PyErr_Format(PyExc_ValueError,
                      "Unknown format code '%c' "
                      "for object of type '%.200s'",
-                     presentation_type,
+                     (char)presentation_type,
                      type_name);
 #if STRINGLIB_IS_UNICODE
     else
@@ -44,6 +44,24 @@ unknown_presentation_type(STRINGLIB_CHAR presentation_type,
 #endif
 }
 
+static void
+invalid_comma_type(STRINGLIB_CHAR presentation_type)
+{
+#if STRINGLIB_IS_UNICODE
+    /* See comment in unknown_presentation_type */
+    if (presentation_type > 32 && presentation_type < 128)
+#endif
+        PyErr_Format(PyExc_ValueError,
+                     "Cannot specify ',' with '%c'.",
+                     (char)presentation_type);
+#if STRINGLIB_IS_UNICODE
+    else
+        PyErr_Format(PyExc_ValueError,
+                     "Cannot specify ',' with '\\x%x'.",
+                     (unsigned int)presentation_type);
+#endif
+}
+
 /*
     get_integer consumes 0 or more decimal digit characters from an
     input string, updates *result with the corresponding positive
@@ -277,8 +295,7 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
             /* These are allowed. See PEP 378.*/
             break;
         default:
-            PyErr_Format(PyExc_ValueError,
-                         "Cannot specify ',' with '%c'.", format->type);
+            invalid_comma_type(format->type);
             return 0;
         }
     }
@@ -632,8 +649,8 @@ get_locale_info(int type, LocaleInfo *locale_info)
     case LT_DEFAULT_LOCALE:
         locale_info->decimal_point = ".";
         locale_info->thousands_sep = ",";
-        locale_info->grouping = "\3"; /* Group every 3 characters,
-                                         trailing 0 means repeat
+        locale_info->grouping = "\3"; /* Group every 3 characters.  The
+                                         (implicit) trailing 0 means repeat
                                          infinitely. */
         break;
     case LT_NO_LOCALE:
@@ -759,14 +776,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
             goto done;
         }
 
-        /* Error to specify a comma. */
-        if (format->thousands_separators) {
-            PyErr_SetString(PyExc_ValueError,
-                            "Thousands separators not allowed with integer"
-                            " format specifier 'c'");
-            goto done;
-        }
-
         /* taken from unicodeobject.c formatchar() */
         /* Integer input truncated to a character */
 /* XXX: won't work for int */
@@ -932,20 +941,16 @@ format_float_internal(PyObject *value,
        from a hard-code pseudo-locale */
     LocaleInfo locale;
 
-    /* Alternate is not allowed on floats. */
-    if (format->alternate) {
-        PyErr_SetString(PyExc_ValueError,
-                        "Alternate form (#) not allowed in float format "
-                        "specifier");
-        goto done;
-    }
+    if (format->alternate)
+        flags |= Py_DTSF_ALT;
 
     if (type == '\0') {
-        /* Omitted type specifier. This is like 'g' but with at least one
-           digit after the decimal point, and different default precision.*/
-        type = 'g';
-        default_precision = PyFloat_STR_PRECISION;
+        /* Omitted type specifier.  Behaves in the same way as repr(x)
+           and str(x) if no precision is given, else like 'g', but with
+           at least one digit after the decimal point. */
         flags |= Py_DTSF_ADD_DOT_0;
+        type = 'r';
+        default_precision = 0;
     }
 
     if (type == 'n')
@@ -953,13 +958,6 @@ format_float_internal(PyObject *value,
            format the result. We take care of that later. */
         type = 'g';
 
-#if PY_VERSION_HEX < 0x0301000
-    /* 'F' is the same as 'f', per the PEP */
-    /* This is no longer the case in 3.x */
-    if (type == 'F')
-        type = 'f';
-#endif
-
     val = PyFloat_AsDouble(value);
     if (val == -1.0 && PyErr_Occurred())
         goto done;
@@ -972,12 +970,8 @@ format_float_internal(PyObject *value,
 
     if (precision < 0)
         precision = default_precision;
-
-#if PY_VERSION_HEX < 0x03010000
-    /* 3.1 no longer converts large 'f' to 'g'. */
-    if ((type == 'f' || type == 'F') && fabs(val) >= 1e50)
+    else if (type == 'r')
         type = 'g';
-#endif
 
     /* Cast "type", because if we're in unicode we need to pass a
        8-bit char. This is safe, because we've restricted what "type"
@@ -1105,15 +1099,7 @@ format_complex_internal(PyObject *value,
        from a hard-code pseudo-locale */
     LocaleInfo locale;
 
-    /* Alternate is not allowed on complex. */
-    if (format->alternate) {
-        PyErr_SetString(PyExc_ValueError,
-                        "Alternate form (#) not allowed in complex format "
-                        "specifier");
-        goto done;
-    }
-
-    /* Neither is zero pading. */
+    /* Zero padding is not allowed. */
     if (format->fill_char == '0') {
         PyErr_SetString(PyExc_ValueError,
                         "Zero padding is not allowed in complex format "
@@ -1136,10 +1122,13 @@ format_complex_internal(PyObject *value,
     if (im == -1.0 && PyErr_Occurred())
         goto done;
 
+    if (format->alternate)
+        flags |= Py_DTSF_ALT;
+
     if (type == '\0') {
         /* Omitted type specifier. Should be like str(self). */
-        type = 'g';
-        default_precision = PyFloat_STR_PRECISION;
+        type = 'r';
+        default_precision = 0;
         if (re == 0.0 && copysign(1.0, re) == 1.0)
             skip_re = 1;
         else
@@ -1151,15 +1140,10 @@ format_complex_internal(PyObject *value,
            format the result. We take care of that later. */
         type = 'g';
 
-#if PY_VERSION_HEX < 0x03010000
-    /* This is no longer the case in 3.x */
-    /* 'F' is the same as 'f', per the PEP */
-    if (type == 'F')
-        type = 'f';
-#endif
-
     if (precision < 0)
         precision = default_precision;
+    else if (type == 'r')
+        type = 'g';
 
     /* Cast "type", because if we're in unicode we need to pass a
        8-bit char. This is safe, because we've restricted what "type"
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
index 20c7507..0170bdd 100644
--- a/Objects/stringlib/partition.h
+++ b/Objects/stringlib/partition.h
@@ -8,10 +8,10 @@
 #endif
 
 Py_LOCAL_INLINE(PyObject*)
-stringlib_partition(
-    PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
-    PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
-    )
+stringlib_partition(PyObject* str_obj,
+                    const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                    PyObject* sep_obj,
+                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
 {
     PyObject* out;
     Py_ssize_t pos;
@@ -25,15 +25,21 @@ stringlib_partition(
     if (!out)
         return NULL;
 
-    pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH);
+    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
 
     if (pos < 0) {
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
+#else
         Py_INCREF(str_obj);
         PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
         Py_INCREF(STRINGLIB_EMPTY);
         PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
         Py_INCREF(STRINGLIB_EMPTY);
         PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+#endif
         return out;
     }
 
@@ -52,13 +58,13 @@ stringlib_partition(
 }
 
 Py_LOCAL_INLINE(PyObject*)
-stringlib_rpartition(
-    PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
-    PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
-    )
+stringlib_rpartition(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     PyObject* sep_obj,
+                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
 {
     PyObject* out;
-    Py_ssize_t pos, j;
+    Py_ssize_t pos;
 
     if (sep_len == 0) {
         PyErr_SetString(PyExc_ValueError, "empty separator");
@@ -69,21 +75,21 @@ stringlib_rpartition(
     if (!out)
         return NULL;
 
-    /* XXX - create reversefastsearch helper! */
-        pos = -1;
-        for (j = str_len - sep_len; j >= 0; --j)
-            if (STRINGLIB_CMP(str+j, sep, sep_len) == 0) {
-                pos = j;
-                break;
-            }
+    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
 
     if (pos < 0) {
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
+#else
         Py_INCREF(STRINGLIB_EMPTY);
         PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
         Py_INCREF(STRINGLIB_EMPTY);
         PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
         Py_INCREF(str_obj);
         PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
+#endif
         return out;
     }
 
@@ -102,10 +108,3 @@ stringlib_rpartition(
 }
 
 #endif
-
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/
diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h
new file mode 100644
index 0000000..60e7767
--- /dev/null
+++ b/Objects/stringlib/split.h
@@ -0,0 +1,394 @@
+/* stringlib: split implementation */
+
+#ifndef STRINGLIB_SPLIT_H
+#define STRINGLIB_SPLIT_H
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+/* Overallocate the initial list to reduce the number of reallocs for small
+   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
+   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
+   text (roughly 11 words per line) and field delimited data (usually 1-10
+   fields).  For large strings the split algorithms are bandwidth limited
+   so increasing the preallocation likely will not improve things.*/
+
+#define MAX_PREALLOC 12
+
+/* 5 splits gives 6 elements */
+#define PREALLOC_SIZE(maxsplit) \
+    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
+
+#define SPLIT_APPEND(data, left, right)         \
+    sub = STRINGLIB_NEW((data) + (left),        \
+                        (right) - (left));      \
+    if (sub == NULL)                            \
+        goto onError;                           \
+    if (PyList_Append(list, sub)) {             \
+        Py_DECREF(sub);                         \
+        goto onError;                           \
+    }                                           \
+    else                                        \
+        Py_DECREF(sub);
+
+#define SPLIT_ADD(data, left, right) {          \
+    sub = STRINGLIB_NEW((data) + (left),        \
+                        (right) - (left));      \
+    if (sub == NULL)                            \
+        goto onError;                           \
+    if (count < MAX_PREALLOC) {                 \
+        PyList_SET_ITEM(list, count, sub);      \
+    } else {                                    \
+        if (PyList_Append(list, sub)) {         \
+            Py_DECREF(sub);                     \
+            goto onError;                       \
+        }                                       \
+        else                                    \
+            Py_DECREF(sub);                     \
+    }                                           \
+    count++; }
+
+
+/* Always force the list to the expected size. */
+#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_split_whitespace(PyObject* str_obj,
+                           const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                           Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while (maxcount-- > 0) {
+        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
+            i++;
+        if (i == str_len) break;
+        j = i; i++;
+        while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
+            i++;
+#ifndef STRINGLIB_MUTABLE
+        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No whitespace in str_obj, so just use it as list[0] */
+            Py_INCREF(str_obj);
+            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+            count++;
+            break;
+        }
+#endif
+        SPLIT_ADD(str, j, i);
+    }
+
+    if (i < str_len) {
+        /* Only occurs when maxcount was reached */
+        /* Skip any remaining whitespace and copy to end of string */
+        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
+            i++;
+        if (i != str_len)
+            SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_split_char(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     const STRINGLIB_CHAR ch,
+                     Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while ((j < str_len) && (maxcount-- > 0)) {
+        for(; j < str_len; j++) {
+            /* I found that using memchr makes no difference */
+            if (str[j] == ch) {
+                SPLIT_ADD(str, i, j);
+                i = j = j + 1;
+                break;
+            }
+        }
+    }
+#ifndef STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* ch not in str_obj, so just use str_obj as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    if (i <= str_len) {
+        SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_split(PyObject* str_obj,
+                const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
+                Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, pos, count=0;
+    PyObject *list, *sub;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+    else if (sep_len == 1)
+        return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
+
+    list = PyList_New(PREALLOC_SIZE(maxcount));
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while (maxcount-- > 0) {
+        pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
+        if (pos < 0)
+            break;
+        j = i + pos;
+        SPLIT_ADD(str, i, j);
+        i = j + sep_len;
+    }
+#ifndef STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* No match in str_obj, so just use it as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    {
+        SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_rsplit_whitespace(PyObject* str_obj,
+                            const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                            Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = str_len - 1;
+    while (maxcount-- > 0) {
+        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
+            i--;
+        if (i < 0) break;
+        j = i; i--;
+        while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
+            i--;
+#ifndef STRINGLIB_MUTABLE
+        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No whitespace in str_obj, so just use it as list[0] */
+            Py_INCREF(str_obj);
+            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+            count++;
+            break;
+        }
+#endif
+        SPLIT_ADD(str, i + 1, j + 1);
+    }
+
+    if (i >= 0) {
+        /* Only occurs when maxcount was reached */
+        /* Skip any remaining whitespace and copy to beginning of string */
+        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
+            i--;
+        if (i >= 0)
+            SPLIT_ADD(str, 0, i + 1);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_rsplit_char(PyObject* str_obj,
+                      const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                      const STRINGLIB_CHAR ch,
+                      Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = str_len - 1;
+    while ((i >= 0) && (maxcount-- > 0)) {
+        for(; i >= 0; i--) {
+            if (str[i] == ch) {
+                SPLIT_ADD(str, i + 1, j + 1);
+                j = i = i - 1;
+                break;
+            }
+        }
+    }
+#ifndef STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* ch not in str_obj, so just use str_obj as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    if (j >= -1) {
+        SPLIT_ADD(str, 0, j + 1);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_rsplit(PyObject* str_obj,
+                 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
+                 Py_ssize_t maxcount)
+{
+    Py_ssize_t j, pos, count=0;
+    PyObject *list, *sub;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+    else if (sep_len == 1)
+        return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
+
+    list = PyList_New(PREALLOC_SIZE(maxcount));
+    if (list == NULL)
+        return NULL;
+
+    j = str_len;
+    while (maxcount-- > 0) {
+        pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
+        if (pos < 0)
+            break;
+        SPLIT_ADD(str, pos + sep_len, j);
+        j = pos;
+    }
+#ifndef STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* No match in str_obj, so just use it as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    {
+        SPLIT_ADD(str, 0, j);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+stringlib_splitlines(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     int keepends)
+{
+    /* This does not use the preallocated list because splitlines is
+       usually run with hundreds of newlines.  The overhead of
+       switching between PyList_SET_ITEM and append causes about a
+       2-3% slowdown for that common case.  A smarter implementation
+       could move the if check out, so the SET_ITEMs are done first
+       and the appends only done when the prealloc buffer is full.
+       That's too much work for little gain.*/
+
+    register Py_ssize_t i;
+    register Py_ssize_t j;
+    PyObject *list = PyList_New(0);
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    for (i = j = 0; i < str_len; ) {
+        Py_ssize_t eol;
+
+        /* Find a line and append it */
+        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
+            i++;
+
+        /* Skip the line break reading CRLF as one line break */
+        eol = i;
+        if (i < str_len) {
+            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
+                i += 2;
+            else
+                i++;
+            if (keepends)
+                eol = i;
+        }
+#ifndef STRINGLIB_MUTABLE
+        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No linebreak in str_obj, so just use it as list[0] */
+            if (PyList_Append(list, str_obj))
+                goto onError;
+            break;
+        }
+#endif
+        SPLIT_APPEND(str, j, eol);
+        j = i;
+    }
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+#endif
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
index b2095fd..6f10727 100644
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -499,13 +499,16 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
         PyObject *key = SubString_new_object(&first);
         if (key == NULL)
             goto error;
-        if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
+
+        /* Use PyObject_GetItem instead of PyDict_GetItem because this
+           code is no longer just used with kwargs. It might be passed
+           a non-dict when called through format_map. */
+        if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
             PyErr_SetObject(PyExc_KeyError, key);
             Py_DECREF(key);
             goto error;
         }
         Py_DECREF(key);
-        Py_INCREF(obj);
     }
     else {
         /* look up in args */
@@ -1039,6 +1042,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
     return build_string(&input, args, kwargs, recursion_depth, &auto_number);
 }
 
+static PyObject *
+do_string_format_map(PyObject *self, PyObject *obj)
+{
+    return do_string_format(self, NULL, obj);
+}
 
 
 /************************************************************************/
@@ -1180,10 +1188,15 @@ static PyTypeObject PyFormatterIter_Type = {
    describing the parsed elements.  It's a wrapper around
    stringlib/string_format.h's MarkupIterator */
 static PyObject *
-formatter_parser(STRINGLIB_OBJECT *self)
+formatter_parser(PyObject *ignored, STRINGLIB_OBJECT *self)
 {
     formatteriterobject *it;
 
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
     it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
     if (it == NULL)
         return NULL;
@@ -1315,7 +1328,7 @@ static PyTypeObject PyFieldNameIter_Type = {
    field_name_split.  The iterator it returns is a
    FieldNameIterator */
 static PyObject *
-formatter_field_name_split(STRINGLIB_OBJECT *self)
+formatter_field_name_split(PyObject *ignored, STRINGLIB_OBJECT *self)
 {
     SubString first;
     Py_ssize_t first_idx;
@@ -1324,6 +1337,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self)
     PyObject *first_obj = NULL;
     PyObject *result = NULL;
 
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
     it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
     if (it == NULL)
         return NULL;
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index a5672c7..1c49426 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -11,6 +11,8 @@
 #define STRINGLIB_TYPE_NAME      "string"
 #define STRINGLIB_PARSE_CODE     "S"
 #define STRINGLIB_EMPTY          nullstring
+#define STRINGLIB_ISSPACE        Py_ISSPACE
+#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
 #define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
 #define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
 #define STRINGLIB_TOUPPER        Py_TOUPPER
@@ -21,7 +23,7 @@
 #define STRINGLIB_NEW            PyBytes_FromStringAndSize
 #define STRINGLIB_RESIZE         _PyBytes_Resize
 #define STRINGLIB_CHECK          PyBytes_Check
-#define STRINGLIB_CMP            memcmp
+#define STRINGLIB_CHECK_EXACT    PyBytes_CheckExact
 #define STRINGLIB_TOSTR          PyObject_Str
 #define STRINGLIB_GROUPING       _PyBytes_InsertThousandsGrouping
 #define STRINGLIB_GROUPING_LOCALE _PyBytes_InsertThousandsGroupingLocale
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index 4390e22..1e132e5 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -1,13 +1,6 @@
 /* NOTE: this API is -ONLY- for use with single byte character strings. */
 /* Do not use it with Unicode. */
 
-#include "bytes_methods.h"
-
-#ifndef STRINGLIB_MUTABLE
-#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
-#define STRINGLIB_MUTABLE 0
-#endif
-
 /* the more complicated methods.  parts of these should be pulled out into the
    shared code in bytes_methods.c to cut down on duplicate code bloat.  */
 
@@ -25,10 +18,10 @@ stringlib_expandtabs(PyObject *self, PyObject *args)
     size_t i, j;
     PyObject *u;
     int tabsize = 8;
-
+    
     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
         return NULL;
-
+    
     /* First pass: determine size of output string */
     i = j = 0;
     e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
@@ -55,20 +48,20 @@ stringlib_expandtabs(PyObject *self, PyObject *args)
                 }
             }
         }
-
+    
     if ((i + j) > PY_SSIZE_T_MAX) {
         PyErr_SetString(PyExc_OverflowError, "result is too long");
         return NULL;
     }
-
+    
     /* Second pass: create output string and fill it */
     u = STRINGLIB_NEW(NULL, i + j);
     if (!u)
         return NULL;
-
+    
     j = 0;
     q = STRINGLIB_STR(u);
-
+    
     for (p = STRINGLIB_STR(self); p < e; p++)
         if (*p == '\t') {
             if (tabsize > 0) {
@@ -84,7 +77,7 @@ stringlib_expandtabs(PyObject *self, PyObject *args)
             if (*p == '\n' || *p == '\r')
                 j = 0;
         }
-
+    
     return u;
 }
 
@@ -110,16 +103,16 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
     }
 
     u = STRINGLIB_NEW(NULL,
-                                   left + STRINGLIB_LEN(self) + right);
+				   left + STRINGLIB_LEN(self) + right);
     if (u) {
         if (left)
             memset(STRINGLIB_STR(u), fill, left);
         Py_MEMCPY(STRINGLIB_STR(u) + left,
-               STRINGLIB_STR(self),
-               STRINGLIB_LEN(self));
+	       STRINGLIB_STR(self),
+	       STRINGLIB_LEN(self));
         if (right)
             memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
-                   fill, right);
+		   fill, right);
     }
 
     return u;
@@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
 
     return (PyObject*) s;
 }
-
-
-#define _STRINGLIB_SPLIT_APPEND(data, left, right)              \
-        str = STRINGLIB_NEW((data) + (left),                    \
-                                         (right) - (left));     \
-        if (str == NULL)                                        \
-                goto onError;                                   \
-        if (PyList_Append(list, str)) {                         \
-                Py_DECREF(str);                                 \
-                goto onError;                                   \
-        }                                                       \
-        else                                                    \
-                Py_DECREF(str);
-
-PyDoc_STRVAR(splitlines__doc__,
-"B.splitlines([keepends]) -> list of lines\n\
-\n\
-Return a list of the lines in B, breaking at line boundaries.\n\
-Line breaks are not included in the resulting list unless keepends\n\
-is given and true.");
-
-static PyObject*
-stringlib_splitlines(PyObject *self, PyObject *args)
-{
-    register Py_ssize_t i;
-    register Py_ssize_t j;
-    Py_ssize_t len;
-    int keepends = 0;
-    PyObject *list;
-    PyObject *str;
-    char *data;
-
-    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
-        return NULL;
-
-    data = STRINGLIB_STR(self);
-    len = STRINGLIB_LEN(self);
-
-    /* This does not use the preallocated list because splitlines is
-       usually run with hundreds of newlines.  The overhead of
-       switching between PyList_SET_ITEM and append causes about a
-       2-3% slowdown for that common case.  A smarter implementation
-       could move the if check out, so the SET_ITEMs are done first
-       and the appends only done when the prealloc buffer is full.
-       That's too much work for little gain.*/
-
-    list = PyList_New(0);
-    if (!list)
-        goto onError;
-
-    for (i = j = 0; i < len; ) {
-        Py_ssize_t eol;
-
-        /* Find a line and append it */
-        while (i < len && data[i] != '\n' && data[i] != '\r')
-            i++;
-
-        /* Skip the line break reading CRLF as one line break */
-        eol = i;
-        if (i < len) {
-            if (data[i] == '\r' && i + 1 < len &&
-                data[i+1] == '\n')
-                i += 2;
-            else
-                i++;
-            if (keepends)
-                eol = i;
-        }
-        _STRINGLIB_SPLIT_APPEND(data, j, eol);
-        j = i;
-    }
-    if (j < len) {
-        _STRINGLIB_SPLIT_APPEND(data, j, len);
-    }
-
-    return list;
-
- onError:
-    Py_XDECREF(list);
-    return NULL;
-}
-
-#undef _STRINGLIB_SPLIT_APPEND
-
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index 366acfe..09dae6d 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -11,6 +11,8 @@
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
 #define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
 #define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
 #define STRINGLIB_TOUPPER        Py_UNICODE_TOUPPER
@@ -21,6 +23,7 @@
 #define STRINGLIB_NEW            PyUnicode_FromUnicode
 #define STRINGLIB_RESIZE         PyUnicode_Resize
 #define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 #define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping
 #define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
 
@@ -34,23 +37,4 @@
 
 #define STRINGLIB_WANT_CONTAINS_OBJ 1
 
-/* STRINGLIB_CMP was defined as:
-
-Py_LOCAL_INLINE(int)
-STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
-{
-    if (str[0] != other[0])
-        return 1;
-    return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
-}
-
-but unfortunately that gives a error if the function isn't used in a file that
-includes this file.  So, reluctantly convert it to a macro instead. */
-
-#define STRINGLIB_CMP(str, other, len) \
-    (((str)[0] != (other)[0]) ? \
-     1 : \
-     memcmp((void*) (str), (void*) (other), (len) * sizeof(Py_UNICODE)))
-
-
 #endif /* !STRINGLIB_UNICODEDEFS_H */