4 files changed, 153 insertions, 49 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 0fd752f..6fdcd7b 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1851,6 +1851,7 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buff
    see Objects/stringlib/localeutil.h */
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+    PyObject *unicode,
     int kind,
     void *buffer,
     Py_ssize_t n_buffer,
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
new file mode 100644
index 0000000..935a9c7
--- /dev/null
+++ b/Objects/stringlib/asciilib.h
@@ -0,0 +1,34 @@
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define FASTSEARCH               asciilib_fastsearch
+#define STRINGLIB(F)             asciilib_##F
+#define STRINGLIB_OBJECT         PyUnicodeObject
+#define STRINGLIB_CHAR           Py_UCS1
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_PARSE_CODE     "U"
+#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_TOUPPER        Py_UNICODE_TOUPPER
+#define STRINGLIB_TOLOWER        Py_UNICODE_TOLOWER
+#define STRINGLIB_FILL           Py_UNICODE_FILL
+#define STRINGLIB_STR            PyUnicode_1BYTE_DATA
+#define STRINGLIB_LEN            PyUnicode_GET_LENGTH
+#define STRINGLIB_NEW            unicode_fromascii
+#define STRINGLIB_RESIZE         not_supported
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
+#define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale
+
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_ASCII
+
+#define _Py_InsertThousandsGrouping _PyUnicode_ascii_InsertThousandsGrouping
+#define _Py_InsertThousandsGroupingLocale _PyUnicode_ascii_InsertThousandsGroupingLocale
+
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f89a5f9..134ae29 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8331,6 +8331,15 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
 
 /* --- Helpers ------------------------------------------------------------ */
 
+#include "stringlib/asciilib.h"
+#include "stringlib/fastsearch.h"
+#include "stringlib/partition.h"
+#include "stringlib/split.h"
+#include "stringlib/count.h"
+#include "stringlib/find.h"
+#include "stringlib/localeutil.h"
+#include "stringlib/undef.h"
+
 #include "stringlib/ucs1lib.h"
 #include "stringlib/fastsearch.h"
 #include "stringlib/partition.h"
@@ -8359,7 +8368,10 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
 #include "stringlib/undef.h"
 
 static Py_ssize_t
-any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
+any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ascii)(const Py_UCS1*, Py_ssize_t,
+                                  const Py_UCS1*, Py_ssize_t,
+                                  Py_ssize_t, Py_ssize_t),
+               Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
                                   const Py_UCS1*, Py_ssize_t,
                                   Py_ssize_t, Py_ssize_t),
                Py_ssize_t Py_LOCAL_CALLBACK(ucs2)(const Py_UCS2*, Py_ssize_t,
@@ -8396,7 +8408,10 @@ any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
 
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        result = ucs1(buf1, len1, buf2, len2, start, end);
+        if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
+            result = ascii(buf1, len1, buf2, len2, start, end);
+        else
+            result = ucs1(buf1, len1, buf2, len2, start, end);
         break;
     case PyUnicode_2BYTE_KIND:
         result = ucs2(buf1, len1, buf2, len2, start, end);
@@ -8417,7 +8432,7 @@ any_find_slice(Py_ssize_t Py_LOCAL_CALLBACK(ucs1)(const Py_UCS1*, Py_ssize_t,
 }
 
 Py_ssize_t
-_PyUnicode_InsertThousandsGrouping(int kind, void *data,
+_PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data,
                                    Py_ssize_t n_buffer,
                                    void *digits, Py_ssize_t n_digits,
                                    Py_ssize_t min_width,
@@ -8426,9 +8441,14 @@ _PyUnicode_InsertThousandsGrouping(int kind, void *data,
 {
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        return _PyUnicode_ucs1_InsertThousandsGrouping(
-            (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
-            min_width, grouping, thousands_sep);
+        if (unicode != NULL && PyUnicode_IS_ASCII(unicode))
+            return _PyUnicode_ascii_InsertThousandsGrouping(
+                (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
+                min_width, grouping, thousands_sep);
+        else
+            return _PyUnicode_ucs1_InsertThousandsGrouping(
+                (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
+                min_width, grouping, thousands_sep);
     case PyUnicode_2BYTE_KIND:
         return _PyUnicode_ucs2_InsertThousandsGrouping(
             (Py_UCS2*)data, n_buffer, (Py_UCS2*)digits, n_digits,
@@ -8505,10 +8525,16 @@ PyUnicode_Count(PyObject *str,
     ADJUST_INDICES(start, end, len1);
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        result = ucs1lib_count(
-            ((Py_UCS1*)buf1) + start, end - start,
-            buf2, len2, PY_SSIZE_T_MAX
-            );
+        if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
+            result = asciilib_count(
+                ((Py_UCS1*)buf1) + start, end - start,
+                buf2, len2, PY_SSIZE_T_MAX
+                );
+        else
+            result = ucs1lib_count(
+                ((Py_UCS1*)buf1) + start, end - start,
+                buf2, len2, PY_SSIZE_T_MAX
+                );
         break;
     case PyUnicode_2BYTE_KIND:
         result = ucs2lib_count(
@@ -8565,12 +8591,14 @@ PyUnicode_Find(PyObject *str,
 
     if (direction > 0)
         result = any_find_slice(
-            ucs1lib_find_slice, ucs2lib_find_slice, ucs4lib_find_slice,
+            asciilib_find_slice, ucs1lib_find_slice,
+            ucs2lib_find_slice, ucs4lib_find_slice,
             str, sub, start, end
             );
     else
         result = any_find_slice(
-            ucs1lib_rfind_slice, ucs2lib_rfind_slice, ucs4lib_rfind_slice,
+            asciilib_find_slice, ucs1lib_rfind_slice,
+            ucs2lib_rfind_slice, ucs4lib_rfind_slice,
             str, sub, start, end
             );
 
@@ -9200,9 +9228,14 @@ PyUnicode_Splitlines(PyObject *string, int keepends)
 
     switch(PyUnicode_KIND(string)) {
     case PyUnicode_1BYTE_KIND:
-        list = ucs1lib_splitlines(
-            (PyObject*) string, PyUnicode_1BYTE_DATA(string),
-            PyUnicode_GET_LENGTH(string), keepends);
+        if (PyUnicode_IS_ASCII(string))
+            list = asciilib_splitlines(
+                (PyObject*) string, PyUnicode_1BYTE_DATA(string),
+                PyUnicode_GET_LENGTH(string), keepends);
+        else
+            list = ucs1lib_splitlines(
+                (PyObject*) string, PyUnicode_1BYTE_DATA(string),
+                PyUnicode_GET_LENGTH(string), keepends);
         break;
     case PyUnicode_2BYTE_KIND:
         list = ucs2lib_splitlines(
@@ -9241,10 +9274,16 @@ split(PyObject *self,
     if (substring == NULL)
         switch(PyUnicode_KIND(self)) {
         case PyUnicode_1BYTE_KIND:
-            return ucs1lib_split_whitespace(
-                (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
-                PyUnicode_GET_LENGTH(self), maxcount
-                );
+            if (PyUnicode_IS_ASCII(self))
+                return asciilib_split_whitespace(
+                    (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
+                    PyUnicode_GET_LENGTH(self), maxcount
+                    );
+            else
+                return ucs1lib_split_whitespace(
+                    (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
+                    PyUnicode_GET_LENGTH(self), maxcount
+                    );
         case PyUnicode_2BYTE_KIND:
             return ucs2lib_split_whitespace(
                 (PyObject*) self,  PyUnicode_2BYTE_DATA(self),
@@ -9283,8 +9322,12 @@ split(PyObject *self,
 
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        out = ucs1lib_split(
-            (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
+        if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
+            out = asciilib_split(
+                (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
+        else
+            out = ucs1lib_split(
+                (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
         break;
     case PyUnicode_2BYTE_KIND:
         out = ucs2lib_split(
@@ -9323,10 +9366,16 @@ rsplit(PyObject *self,
     if (substring == NULL)
         switch(PyUnicode_KIND(self)) {
         case PyUnicode_1BYTE_KIND:
-            return ucs1lib_rsplit_whitespace(
-                (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
-                PyUnicode_GET_LENGTH(self), maxcount
-                );
+            if (PyUnicode_IS_ASCII(self))
+                return asciilib_rsplit_whitespace(
+                    (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
+                    PyUnicode_GET_LENGTH(self), maxcount
+                    );
+            else
+                return ucs1lib_rsplit_whitespace(
+                    (PyObject*) self,  PyUnicode_1BYTE_DATA(self),
+                    PyUnicode_GET_LENGTH(self), maxcount
+                    );
         case PyUnicode_2BYTE_KIND:
             return ucs2lib_rsplit_whitespace(
                 (PyObject*) self,  PyUnicode_2BYTE_DATA(self),
@@ -9365,8 +9414,12 @@ rsplit(PyObject *self,
 
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        out = ucs1lib_rsplit(
-            (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
+        if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
+            out = asciilib_rsplit(
+                (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
+        else
+            out = ucs1lib_rsplit(
+                (PyObject*) self,  buf1, len1, buf2, len2, maxcount);
         break;
     case PyUnicode_2BYTE_KIND:
         out = ucs2lib_rsplit(
@@ -9387,12 +9440,15 @@ rsplit(PyObject *self,
 }
 
 static Py_ssize_t
-anylib_find(int kind, void *buf1, Py_ssize_t len1,
-            void *buf2, Py_ssize_t len2, Py_ssize_t offset)
+anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1,
+            PyObject *str2, void *buf2, Py_ssize_t len2, Py_ssize_t offset)
 {
     switch(kind) {
     case PyUnicode_1BYTE_KIND:
-        return ucs1lib_find(buf1, len1, buf2, len2, offset);
+        if (PyUnicode_IS_ASCII(str1) && PyUnicode_IS_ASCII(str2))
+            return asciilib_find(buf1, len1, buf2, len2, offset);
+        else
+            return ucs1lib_find(buf1, len1, buf2, len2, offset);
     case PyUnicode_2BYTE_KIND:
         return ucs2lib_find(buf1, len1, buf2, len2, offset);
     case PyUnicode_4BYTE_KIND:
@@ -9403,12 +9459,15 @@ anylib_find(int kind, void *buf1, Py_ssize_t len1,
 }
 
 static Py_ssize_t
-anylib_count(int kind, void* sbuf, Py_ssize_t slen,
-             void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
+anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
+             PyObject *str1, void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
 {
         switch(kind) {
         case PyUnicode_1BYTE_KIND:
-            return ucs1lib_count(sbuf, slen, buf1, len1, maxcount);
+            if (PyUnicode_IS_ASCII(sstr) && PyUnicode_IS_ASCII(str1))
+                return asciilib_count(sbuf, slen, buf1, len1, maxcount);
+            else
+                return ucs1lib_count(sbuf, slen, buf1, len1, maxcount);
         case PyUnicode_2BYTE_KIND:
             return ucs2lib_count(sbuf, slen, buf1, len1, maxcount);
         case PyUnicode_4BYTE_KIND:
@@ -9497,7 +9556,7 @@ replace(PyObject *self, PyObject *str1,
                 if (!buf1) goto error;
                 release1 = 1;
             }
-            i = anylib_find(rkind, sbuf, slen, buf1, len1, 0);
+            i = anylib_find(rkind, self, sbuf, slen, str1, buf1, len1, 0);
             if (i < 0)
                 goto nothing;
             if (rkind > kind2) {
@@ -9530,9 +9589,9 @@ replace(PyObject *self, PyObject *str1,
             i += len1;
 
             while ( --maxcount > 0) {
-                i = anylib_find(rkind, sbuf+PyUnicode_KIND_SIZE(rkind, i),
-                                slen-i,
-                                buf1, len1, i);
+                i = anylib_find(rkind, self,
+                                sbuf+PyUnicode_KIND_SIZE(rkind, i), slen-i,
+                                str1, buf1, len1, i);
                 if (i == -1)
                     break;
                 memcpy(res + PyUnicode_KIND_SIZE(rkind, i),
@@ -9557,7 +9616,7 @@ replace(PyObject *self, PyObject *str1,
             if (!buf1) goto error;
             release1 = 1;
         }
-        n = anylib_count(rkind, sbuf, slen, buf1, len1, maxcount);
+        n = anylib_count(rkind, self, sbuf, slen, str1, buf1, len1, maxcount);
         if (n == 0)
             goto nothing;
         if (kind2 < rkind) {
@@ -9596,9 +9655,9 @@ replace(PyObject *self, PyObject *str1,
         if (len1 > 0) {
             while (n-- > 0) {
                 /* look for next match */
-                j = anylib_find(rkind,
-                                sbuf + PyUnicode_KIND_SIZE(rkind, i),
-                                slen-i, buf1, len1, i);
+                j = anylib_find(rkind, self,
+                                sbuf + PyUnicode_KIND_SIZE(rkind, i), slen-i,
+                                str1, buf1, len1, i);
                 if (j == -1)
                     break;
                 else if (j > i) {
@@ -10443,7 +10502,8 @@ unicode_find(PyObject *self, PyObject *args)
         return NULL;
 
     result = any_find_slice(
-        ucs1lib_find_slice, ucs2lib_find_slice, ucs4lib_find_slice,
+        asciilib_find_slice, ucs1lib_find_slice,
+        ucs2lib_find_slice, ucs4lib_find_slice,
         self, (PyObject*)substring, start, end
         );
 
@@ -10536,7 +10596,8 @@ unicode_index(PyObject *self, PyObject *args)
         return NULL;
 
     result = any_find_slice(
-        ucs1lib_find_slice, ucs2lib_find_slice, ucs4lib_find_slice,
+        asciilib_find_slice, ucs1lib_find_slice,
+        ucs2lib_find_slice, ucs4lib_find_slice,
         self, (PyObject*)substring, start, end
         );
 
@@ -11548,7 +11609,8 @@ unicode_rfind(PyObject *self, PyObject *args)
         return NULL;
 
     result = any_find_slice(
-        ucs1lib_rfind_slice, ucs2lib_rfind_slice, ucs4lib_rfind_slice,
+        asciilib_rfind_slice, ucs1lib_rfind_slice,
+        ucs2lib_rfind_slice, ucs4lib_rfind_slice,
         self, (PyObject*)substring, start, end
         );
 
@@ -11583,7 +11645,8 @@ unicode_rindex(PyObject *self, PyObject *args)
         return NULL;
 
     result = any_find_slice(
-        ucs1lib_rfind_slice, ucs2lib_rfind_slice, ucs4lib_rfind_slice,
+        asciilib_rfind_slice, ucs1lib_rfind_slice,
+        ucs2lib_rfind_slice, ucs4lib_rfind_slice,
         self, (PyObject*)substring, start, end
         );
 
@@ -11712,7 +11775,10 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
 
     switch(PyUnicode_KIND(str_obj)) {
     case PyUnicode_1BYTE_KIND:
-        out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
+        if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
+            out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
+        else
+            out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
         break;
     case PyUnicode_2BYTE_KIND:
         out = ucs2lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -11781,7 +11847,10 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
 
     switch(PyUnicode_KIND(str_in)) {
     case PyUnicode_1BYTE_KIND:
-        out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
+        if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
+            out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
+        else
+            out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
         break;
     case PyUnicode_2BYTE_KIND:
         out = ucs2lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index 609df64..c989d83 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -501,7 +501,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
         spec->n_grouped_digits = 0;
     else
         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
-            PyUnicode_1BYTE_KIND, NULL, 0, NULL,
+            NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL,
             spec->n_digits, spec->n_min_width,
             locale->grouping, locale->thousands_sep);
 
@@ -603,7 +603,7 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
         r =
 #endif
             _PyUnicode_InsertThousandsGrouping(
-                kind,
+                out, kind,
                 (char*)data + PyUnicode_KIND_SIZE(kind, pos),
                 spec->n_grouped_digits,
                 pdigits + PyUnicode_KIND_SIZE(kind, d_pos),