Issue #19424: Optimize PyUnicode_CompareWithASCIIString()

Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes.
author: Victor Stinner <victor.stinner@gmail.com> 2013-10-29 22:31:50 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2013-10-29 22:31:50 (GMT)
commit: 602f7cf0b9afc1c7e0ab859bcfb219d20158a786 (patch)
tree: 29523b171dac7721bee7b985a868863bcc2e6353
parent: ab457a2193a612610e996dd4b838437815ad8671 (diff)
download: cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.zip
cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.gz
cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.bz2
1 files changed, 30 insertions, 13 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 208e5e3..e6195fe 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10573,25 +10573,42 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 {
     Py_ssize_t i;
     int kind;
-    void *data;
     Py_UCS4 chr;
 
     assert(_PyUnicode_CHECK(uni));
     if (PyUnicode_READY(uni) == -1)
         return -1;
     kind = PyUnicode_KIND(uni);
-    data = PyUnicode_DATA(uni);
-    /* Compare Unicode string and source character set string */
-    for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
-        if (chr != str[i])
-            return (chr < (unsigned char)(str[i])) ? -1 : 1;
-    /* This check keeps Python strings that end in '\0' from comparing equal
-     to C strings identical up to that point. */
-    if (PyUnicode_GET_LENGTH(uni) != i || chr)
-        return 1; /* uni is longer */
-    if (str[i])
-        return -1; /* str is longer */
-    return 0;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        char *data = PyUnicode_1BYTE_DATA(uni);
+        Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni);
+        size_t len, len2 = strlen(str);
+        int cmp;
+
+        len = Py_MIN(len1, len2);
+        cmp = memcmp(data, str, len);
+        if (cmp != 0)
+            return cmp;
+        if (len1 > len2)
+            return 1; /* uni is longer */
+        if (len2 > len1)
+            return -1; /* str is longer */
+        return 0;
+    }
+    else {
+        void *data = PyUnicode_DATA(uni);
+        /* Compare Unicode string and source character set string */
+        for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
+            if (chr != str[i])
+                return (chr < (unsigned char)(str[i])) ? -1 : 1;
+        /* This check keeps Python strings that end in '\0' from comparing equal
+         to C strings identical up to that point. */
+        if (PyUnicode_GET_LENGTH(uni) != i || chr)
+            return 1; /* uni is longer */
+        if (str[i])
+            return -1; /* str is longer */
+        return 0;
+    }
 }
author	Victor Stinner <victor.stinner@gmail.com>	2013-10-29 22:31:50 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2013-10-29 22:31:50 (GMT)
commit	602f7cf0b9afc1c7e0ab859bcfb219d20158a786 (patch)
tree	29523b171dac7721bee7b985a868863bcc2e6353
parent	ab457a2193a612610e996dd4b838437815ad8671 (diff)
download	cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.zip cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.gz cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.bz2