From e19aa388e8f4aa88ef5e2a25444a06c2ec3408ad Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 4 Oct 2011 16:04:01 +0200 Subject: When expandtabs() would be a no-op, don't create a duplicate string --- Lib/test/test_unicode.py | 4 ++++ Objects/unicodeobject.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 4f6f132..840b76f 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1585,6 +1585,10 @@ class UnicodeTest(string_tests.CommonTest, return self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize) + def test_expandtabs_optimization(self): + s = 'abc' + self.assertIs(s.expandtabs(), s) + def test_raiseMemError(self): if struct.calcsize('P') == 8: # 64 bits pointers diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f3a5dd9..af4ce63 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10196,6 +10196,7 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) void *src_data, *dest_data; int tabsize = 8; int kind; + int found; if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) return NULL; @@ -10205,9 +10206,11 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) i = j = line_pos = 0; kind = PyUnicode_KIND(self); src_data = PyUnicode_DATA(self); + found = 0; for (; i < src_len; i++) { ch = PyUnicode_READ(kind, src_data, i); if (ch == '\t') { + found = 1; if (tabsize > 0) { incr = tabsize - (line_pos % tabsize); /* cannot overflow */ if (j > PY_SSIZE_T_MAX - incr) @@ -10225,6 +10228,10 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) line_pos = 0; } } + if (!found && PyUnicode_CheckExact(self)) { + Py_INCREF((PyObject *) self); + return (PyObject *) self; + } /* Second pass: create output string and fill it */ u = PyUnicode_New(j, PyUnicode_MAX_CHAR_VALUE(self)); -- cgit v0.12