summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-10-29 22:31:50 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-10-29 22:31:50 (GMT)
commit602f7cf0b9afc1c7e0ab859bcfb219d20158a786 (patch)
tree29523b171dac7721bee7b985a868863bcc2e6353
parentab457a2193a612610e996dd4b838437815ad8671 (diff)
downloadcpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.zip
cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.gz
cpython-602f7cf0b9afc1c7e0ab859bcfb219d20158a786.tar.bz2
Issue #19424: Optimize PyUnicode_CompareWithASCIIString()
Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes.
-rw-r--r--Objects/unicodeobject.c43
1 files changed, 30 insertions, 13 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 208e5e3..e6195fe 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10573,25 +10573,42 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
{
Py_ssize_t i;
int kind;
- void *data;
Py_UCS4 chr;
assert(_PyUnicode_CHECK(uni));
if (PyUnicode_READY(uni) == -1)
return -1;
kind = PyUnicode_KIND(uni);
- data = PyUnicode_DATA(uni);
- /* Compare Unicode string and source character set string */
- for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
- if (chr != str[i])
- return (chr < (unsigned char)(str[i])) ? -1 : 1;
- /* This check keeps Python strings that end in '\0' from comparing equal
- to C strings identical up to that point. */
- if (PyUnicode_GET_LENGTH(uni) != i || chr)
- return 1; /* uni is longer */
- if (str[i])
- return -1; /* str is longer */
- return 0;
+ if (kind == PyUnicode_1BYTE_KIND) {
+ char *data = PyUnicode_1BYTE_DATA(uni);
+ Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni);
+ size_t len, len2 = strlen(str);
+ int cmp;
+
+ len = Py_MIN(len1, len2);
+ cmp = memcmp(data, str, len);
+ if (cmp != 0)
+ return cmp;
+ if (len1 > len2)
+ return 1; /* uni is longer */
+ if (len2 > len1)
+ return -1; /* str is longer */
+ return 0;
+ }
+ else {
+ void *data = PyUnicode_DATA(uni);
+ /* Compare Unicode string and source character set string */
+ for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
+ if (chr != str[i])
+ return (chr < (unsigned char)(str[i])) ? -1 : 1;
+ /* This check keeps Python strings that end in '\0' from comparing equal
+ to C strings identical up to that point. */
+ if (PyUnicode_GET_LENGTH(uni) != i || chr)
+ return 1; /* uni is longer */
+ if (str[i])
+ return -1; /* str is longer */
+ return 0;
+ }
}