From 03c3e35d42a2f9855fca4beb89e5cbbefe2d9c21 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 21:53:09 +0200
Subject: Add fast-path in PyUnicode_DecodeCharmap() for pure 8 bit encodings:
 cp037, cp500 and iso8859_1 codecs

---
 Lib/encodings/cp037.py     |  1 -
 Lib/encodings/cp500.py     |  1 -
 Lib/encodings/iso8859_1.py |  1 -
 Objects/unicodeobject.c    | 27 ++++++++++++++++++++++++++-
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py
index bfe2c1e..4edd708 100644
--- a/Lib/encodings/cp037.py
+++ b/Lib/encodings/cp037.py
@@ -301,7 +301,6 @@ decoding_table = (
     '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
     '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
     '\x9f'     #  0xFF -> CONTROL
-    '\ufffe'   ## Widen to UCS2 for optimization
 )
 
 ### Encoding table
diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py
index a975be7..5f61535 100644
--- a/Lib/encodings/cp500.py
+++ b/Lib/encodings/cp500.py
@@ -301,7 +301,6 @@ decoding_table = (
     '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
     '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
     '\x9f'     #  0xFF -> CONTROL
-    '\ufffe'   ## Widen to UCS2 for optimization
 )
 
 ### Encoding table
diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py
index d9cc516..8cfc01f 100644
--- a/Lib/encodings/iso8859_1.py
+++ b/Lib/encodings/iso8859_1.py
@@ -301,7 +301,6 @@ decoding_table = (
     '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
     '\xfe'     #  0xFE -> LATIN SMALL LETTER THORN (Icelandic)
     '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
-    '\ufffe'   ## Widen to UCS2 for optimization
 )
 
 ### Encoding table
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e9153c0..88729c8 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7281,6 +7281,7 @@ PyUnicode_DecodeCharmap(const char *s,
         enum PyUnicode_Kind mapkind;
         void *mapdata;
         Py_UCS4 x;
+        unsigned char ch;
 
         if (PyUnicode_READY(mapping) == -1)
             return NULL;
@@ -7288,8 +7289,32 @@ PyUnicode_DecodeCharmap(const char *s,
         maplen = PyUnicode_GET_LENGTH(mapping);
         mapdata = PyUnicode_DATA(mapping);
         mapkind = PyUnicode_KIND(mapping);
+
+        if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
+            /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
+             * is disabled in encoding aliases, latin1 is preferred because
+             * its implementation is faster. */
+            Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
+            Py_UCS1 *outdata = (Py_UCS1 *)writer.data;
+            Py_UCS4 maxchar = writer.maxchar;
+
+            assert (writer.kind == PyUnicode_1BYTE_KIND);
+            while (s < e) {
+                ch = *s;
+                x = mapdata_ucs1[ch];
+                if (x > maxchar) {
+                    if (_PyUnicodeWriter_PrepareInternal(&writer, 1, 0xff) == -1)
+                        goto onError;
+                    maxchar = writer.maxchar;
+                    outdata = (Py_UCS1 *)writer.data;
+                }
+                outdata[writer.pos] = x;
+                writer.pos++;
+                ++s;
+            }
+        }
+
         while (s < e) {
-            unsigned char ch;
             if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
                 enum PyUnicode_Kind outkind = writer.kind;
                 void *outdata = writer.data;
-- 
cgit v0.12


From 69ed0f4c86e7004aa9867f3f104b76e858f26990 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 21:48:24 +0200
Subject: Use PyUnicode_READ() instead of PyUnicode_READ_CHAR()

"PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it calls
PyUnicode_KIND() and might call it twice." according to its documentation.
---
 Objects/unicodeobject.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 88729c8..fde153e 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -468,7 +468,9 @@ unicode_result_ready(PyObject *unicode)
     }
 
     if (length == 1) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
+        void *data = PyUnicode_DATA(unicode);
+        int kind = PyUnicode_KIND(unicode);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
         if (ch < 256) {
             PyObject *latin1_char = unicode_latin1[ch];
             if (latin1_char != NULL) {
@@ -2786,6 +2788,9 @@ PyObject *
 PyUnicode_FromOrdinal(int ordinal)
 {
     PyObject *v;
+    void *data;
+    int kind;
+
     if (ordinal < 0 || ordinal > MAX_UNICODE) {
         PyErr_SetString(PyExc_ValueError,
                         "chr() arg not in range(0x110000)");
@@ -2798,7 +2803,9 @@ PyUnicode_FromOrdinal(int ordinal)
     v = PyUnicode_New(1, ordinal);
     if (v == NULL)
         return NULL;
-    PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
+    kind = PyUnicode_KIND(v);
+    data = PyUnicode_DATA(v);
+    PyUnicode_WRITE(kind, data, 0, ordinal);
     assert(_PyUnicode_CheckConsistency(v, 1));
     return v;
 }
@@ -3840,6 +3847,9 @@ PyUnicode_GetLength(PyObject *unicode)
 Py_UCS4
 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
 {
+    void *data;
+    int kind;
+
     if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
         PyErr_BadArgument();
         return (Py_UCS4)-1;
@@ -3848,7 +3858,9 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return (Py_UCS4)-1;
     }
-    return PyUnicode_READ_CHAR(unicode, index);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
+    return PyUnicode_READ(kind, data, index);
 }
 
 int
@@ -7984,10 +7996,14 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
      * -1=not initialized, 0=unknown, 1=strict, 2=replace,
      * 3=ignore, 4=xmlcharrefreplace */
     int known_errorHandler = -1;
+    void *data;
+    int kind;
 
     if (PyUnicode_READY(unicode) == -1)
         return NULL;
     size = PyUnicode_GET_LENGTH(unicode);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
 
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -8002,7 +8018,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
         return res;
 
     while (inpos<size) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
         /* try to encode it */
         charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
         if (x==enc_EXCEPTION) /* error */
@@ -9930,11 +9946,11 @@ replace(PyObject *self, PyObject *str1,
             Py_ssize_t index, pos;
             char *src;
 
-            u1 = PyUnicode_READ_CHAR(str1, 0);
+            u1 = PyUnicode_READ(kind1, buf1, 0);
             pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
             if (pos < 0)
                 goto nothing;
-            u2 = PyUnicode_READ_CHAR(str2, 0);
+            u2 = PyUnicode_READ(kind2, buf2, 0);
             u = PyUnicode_New(slen, maxchar);
             if (!u)
                 goto error;
-- 
cgit v0.12


From a85af502a4e11d910701f5b7fe16b44c400bf57b Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 21:53:54 +0200
Subject: Optimize make_bloom_mask(), used by str.strip(), str.lstrip() and
 str.rstrip()

Write specialized functions per Unicode kind to avoid the expensive
PyUnicode_READ() macro.
---
 Objects/unicodeobject.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fde153e..e0b507f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -543,7 +543,6 @@ static OSVERSIONINFOEX winver;
 
 static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
-#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 
 #define BLOOM_LINEBREAK(ch)                                             \
@@ -553,16 +552,39 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 Py_LOCAL_INLINE(BLOOM_MASK)
 make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 {
+#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN)             \
+    do {                                               \
+        TYPE *data = (TYPE *)PTR;                      \
+        TYPE *end = data + LEN;                        \
+        Py_UCS4 ch;                                    \
+        for (; data != end; data++) {                  \
+            ch = *data;                                \
+            MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
+        }                                              \
+        break;                                         \
+    } while (0)
+
     /* calculate simple bloom-style bitmask for a given unicode string */
 
     BLOOM_MASK mask;
-    Py_ssize_t i;
 
     mask = 0;
-    for (i = 0; i < len; i++)
-        BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
-
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
+        break;
+    case PyUnicode_2BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
+        break;
+    case PyUnicode_4BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
+        break;
+    default:
+        assert(0);
+    }
     return mask;
+
+#undef BLOOM_UPDATE
 }
 
 #define BLOOM_MEMBER(mask, chr, str) \
-- 
cgit v0.12


From 63d5c1a14a644a794bff8d7f9ebc789a85925def Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:13:33 +0200
Subject: Optimize PyUnicode_DecodeCharmap()

Avoid expensive PyUnicode_READ() and PyUnicode_WRITE(), manipulate pointers
instead.
---
 Objects/unicodeobject.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e0b507f..bf49ce5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -7351,27 +7351,29 @@ PyUnicode_DecodeCharmap(const char *s,
         while (s < e) {
             if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
                 enum PyUnicode_Kind outkind = writer.kind;
-                void *outdata = writer.data;
+                Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata;
                 if (outkind == PyUnicode_1BYTE_KIND) {
+                    Py_UCS1 *outdata = (Py_UCS1 *)writer.data;
                     Py_UCS4 maxchar = writer.maxchar;
                     while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
+                        ch = *s;
+                        x = mapdata_ucs2[ch];
                         if (x > maxchar)
                             goto Error;
-                        PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, writer.pos, x);
+                        outdata[writer.pos] = x;
                         writer.pos++;
                         ++s;
                     }
                     break;
                 }
                 else if (outkind == PyUnicode_2BYTE_KIND) {
+                    Py_UCS2 *outdata = (Py_UCS2 *)writer.data;
                     while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
+                        ch = *s;
+                        x = mapdata_ucs2[ch];
                         if (x == 0xFFFE)
                             goto Error;
-                        PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, writer.pos, x);
+                        outdata[writer.pos] = x;
                         writer.pos++;
                         ++s;
                     }
-- 
cgit v0.12


From b3a601450416be068933e237506767e6b150a4a1 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:19:21 +0200
Subject: Fix _PyUnicode_XStrip()

Inline the BLOOM_MEMBER() to only call PyUnicode_READ() only once (per loop
iteration). Store also the length of the seperator in a variable to avoid calls
to PyUnicode_GET_LENGTH().
---
 Objects/unicodeobject.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index bf49ce5..ba72dba 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -587,10 +587,6 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #undef BLOOM_UPDATE
 }
 
-#define BLOOM_MEMBER(mask, chr, str) \
-    (BLOOM(mask, chr) \
-     && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0))
-
 /* Compilation of templated routines */
 
 #include "stringlib/asciilib.h"
@@ -11635,6 +11631,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     int kind;
     Py_ssize_t i, j, len;
     BLOOM_MASK sepmask;
+    Py_ssize_t seplen;
 
     if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
         return NULL;
@@ -11642,24 +11639,35 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
+    seplen = PyUnicode_GET_LENGTH(sepobj);
     sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
                               PyUnicode_DATA(sepobj),
-                              PyUnicode_GET_LENGTH(sepobj));
+                              seplen);
 
     i = 0;
     if (striptype != RIGHTSTRIP) {
-        while (i < len &&
-               BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) {
+        while (i < len) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             i++;
         }
     }
 
     j = len;
     if (striptype != LEFTSTRIP) {
-        do {
+        j--;
+        while (j >= i) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             j--;
-        } while (j >= i &&
-                 BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj));
+        }
+
         j++;
     }
 
-- 
cgit v0.12


From 9c79e41fc5f5cb76b89af040b1675896d57051d9 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:21:08 +0200
Subject: Fix do_strip(): don't call PyUnicode_READ() in Py_UNICODE_ISSPACE()
 to not call it twice

---
 Objects/unicodeobject.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ba72dba..52fe3bc 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11727,16 +11727,23 @@ do_strip(PyObject *self, int striptype)
 
     i = 0;
     if (striptype != RIGHTSTRIP) {
-        while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
+        while (i < len) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+            if (!Py_UNICODE_ISSPACE(ch))
+                break;
             i++;
         }
     }
 
     j = len;
     if (striptype != LEFTSTRIP) {
-        do {
+        j--;
+        while (j >= i) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+            if (!Py_UNICODE_ISSPACE(ch))
+                break;
             j--;
-        } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j)));
+        }
         j++;
     }
 
-- 
cgit v0.12


From f50a4e9bc940e701feb142c35a267c90fc1fff8e Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:38:52 +0200
Subject: Don't calls macros in PyUnicode_WRITE() parameters

PyUnicode_WRITE() expands some parameters twice or more.
---
 Objects/unicodeobject.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 52fe3bc..838d9de 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1958,13 +1958,17 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
+        int kind;
+        void *data;
         if (ch < 256)
             return get_latin1_char((unsigned char)ch);
 
         res = PyUnicode_New(1, ch);
         if (res == NULL)
             return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        PyUnicode_WRITE(kind, data, 0, ch);
         assert(_PyUnicode_CheckConsistency(res, 1));
         return res;
     }
@@ -1994,13 +1998,17 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
+        int kind;
+        void *data;
         if (ch < 256)
             return get_latin1_char((unsigned char)ch);
 
         res = PyUnicode_New(1, ch);
         if (res == NULL)
             return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+        kind = PyUnicode_KIND(res);
+        data = PyUnicode_DATA(res);
+        PyUnicode_WRITE(kind, data, 0, ch);
         assert(_PyUnicode_CheckConsistency(res, 1));
         return res;
     }
-- 
cgit v0.12


From cc7af7219217f247775b9079f75713399f2f0f28 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:39:24 +0200
Subject: Write super-fast version of str.strip(), str.lstrip() and
 str.rstrip() for pure ASCII

---
 Objects/unicodeobject.c | 64 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 838d9de..e348a46 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11722,37 +11722,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
 static PyObject *
 do_strip(PyObject *self, int striptype)
 {
-    int kind;
-    void *data;
     Py_ssize_t len, i, j;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
 
-    kind = PyUnicode_KIND(self);
-    data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
 
-    i = 0;
-    if (striptype != RIGHTSTRIP) {
-        while (i < len) {
-            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-            if (!Py_UNICODE_ISSPACE(ch))
-                break;
-            i++;
+    if (PyUnicode_IS_ASCII(self)) {
+        Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS4 ch = data[i];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
+            j--;
+            while (j >= i) {
+                Py_UCS4 ch = data[j];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                j--;
+            }
+            j++;
         }
     }
+    else {
+        int kind = PyUnicode_KIND(self);
+        void *data = PyUnicode_DATA(self);
 
-    j = len;
-    if (striptype != LEFTSTRIP) {
-        j--;
-        while (j >= i) {
-            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
-            if (!Py_UNICODE_ISSPACE(ch))
-                break;
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
             j--;
+            while (j >= i) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                j--;
+            }
+            j++;
         }
-        j++;
     }
 
     return PyUnicode_Substring(self, i, j);
-- 
cgit v0.12


From 0cff4b16d922ad140991bf469cc944ad4858ed49 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Tue, 9 Apr 2013 22:52:48 +0200
Subject: replace(): only call PyUnicode_DATA(u) once

---
 Objects/unicodeobject.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e348a46..6b63157 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9972,7 +9972,7 @@ replace(PyObject *self, PyObject *str1,
             Py_UCS4 u1, u2;
             int rkind;
             Py_ssize_t index, pos;
-            char *src;
+            char *src, *rbuf;
 
             u1 = PyUnicode_READ(kind1, buf1, 0);
             pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
@@ -9984,8 +9984,9 @@ replace(PyObject *self, PyObject *str1,
                 goto error;
             _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
             rkind = PyUnicode_KIND(u);
+            rbuf = PyUnicode_DATA(u);
 
-            PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
+            PyUnicode_WRITE(rkind, rbuf, pos, u2);
             index = 0;
             src = sbuf;
             while (--maxcount)
@@ -9997,7 +9998,7 @@ replace(PyObject *self, PyObject *str1,
                 pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
                 if (pos < 0)
                     break;
-                PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2);
+                PyUnicode_WRITE(rkind, rbuf, index + pos, u2);
             }
         }
         else {
-- 
cgit v0.12