From d41a98bdd9076eeadf4d3ba6d8db287e26b89777 Mon Sep 17 00:00:00 2001 From: Nadeem Vawda Date: Thu, 13 Oct 2011 13:34:16 +0200 Subject: Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a linear-time one. Also fix the bz2 module, whose classes used the same algorithm. --- Misc/NEWS | 3 +++ Modules/_io/fileio.c | 19 ++++--------------- Modules/bz2module.c | 19 ++++--------------- 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 186ea21..3c3bff8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -121,6 +121,9 @@ Tests Extension Modules ----------------- +- Issue #13159: FileIO and BZ2File now use a linear-time buffer growth + strategy instead of a quadratic-time one. + - Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle would be finalized after the reference to its underlying BufferedRWPair's writer got cleared by the GC. diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index b1d492b..be5c9f8 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -43,12 +43,6 @@ #define SMALLCHUNK BUFSIZ #endif -#if SIZEOF_INT < 4 -#define BIGCHUNK (512 * 32) -#else -#define BIGCHUNK (512 * 1024) -#endif - typedef struct { PyObject_HEAD int fd; @@ -565,15 +559,10 @@ new_buffersize(fileio *self, size_t currentsize) } } #endif - if (currentsize > SMALLCHUNK) { - /* Keep doubling until we reach BIGCHUNK; - then keep adding BIGCHUNK. */ - if (currentsize <= BIGCHUNK) - return currentsize + currentsize; - else - return currentsize + BIGCHUNK; - } - return currentsize + SMALLCHUNK; + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + return currentsize + (currentsize >> 3) + 6; } static PyObject * diff --git a/Modules/bz2module.c b/Modules/bz2module.c index 3e55202..a671e8d 100644 --- a/Modules/bz2module.c +++ b/Modules/bz2module.c @@ -218,25 +218,14 @@ Util_CatchBZ2Error(int bzerror) #define SMALLCHUNK BUFSIZ #endif -#if SIZEOF_INT < 4 -#define BIGCHUNK (512 * 32) -#else -#define BIGCHUNK (512 * 1024) -#endif - /* This is a hacked version of Python's fileobject.c:new_buffersize(). */ static size_t Util_NewBufferSize(size_t currentsize) { - if (currentsize > SMALLCHUNK) { - /* Keep doubling until we reach BIGCHUNK; - then keep adding BIGCHUNK. */ - if (currentsize <= BIGCHUNK) - return currentsize + currentsize; - else - return currentsize + BIGCHUNK; - } - return currentsize + SMALLCHUNK; + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + return currentsize + (currentsize >> 3) + 6; } /* This is a hacked version of Python's fileobject.c:get_line(). */ -- cgit v0.12