summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@wyplay.com>2011-10-05 12:02:44 (GMT)
committerVictor Stinner <vstinner@wyplay.com>2011-10-05 12:02:44 (GMT)
commitae86485517cab27d6cef96d036e870888660a144 (patch)
treebc06294d70c4cd4c4a56d678f269b9f6a3144fad
parentb9275c104e50361fe3a785126e5ecad24d319a7a (diff)
downloadcpython-ae86485517cab27d6cef96d036e870888660a144.zip
cpython-ae86485517cab27d6cef96d036e870888660a144.tar.gz
cpython-ae86485517cab27d6cef96d036e870888660a144.tar.bz2
Speedup find_maxchar_surrogates() for 32-bit wchar_t
If we have at least one character in U+10000-U+10FFFF, we know that we must use PyUnicode_4BYTE_KIND kind.
-rw-r--r--Objects/unicodeobject.c14
1 files changed, 6 insertions, 8 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 028cada..93459a7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1060,19 +1060,17 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
const wchar_t *iter;
assert(num_surrogates != NULL && maxchar != NULL);
- if (num_surrogates == NULL || maxchar == NULL) {
- PyErr_SetString(PyExc_SystemError,
- "unexpected NULL arguments to "
- "PyUnicode_FindMaxCharAndNumSurrogatePairs");
- return -1;
- }
-
*num_surrogates = 0;
*maxchar = 0;
for (iter = begin; iter < end; ) {
- if (*iter > *maxchar)
+ if (*iter > *maxchar) {
*maxchar = *iter;
+#if SIZEOF_WCHAR_T != 2
+ if (*maxchar >= 0x10000)
+ return 0;
+#endif
+ }
#if SIZEOF_WCHAR_T == 2
if (*iter >= 0xD800 && *iter <= 0xDBFF
&& (iter+1) < end && iter[1] >= 0xDC00 && iter[1] <= 0xDFFF)