summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadeem Vawda <nadeem.vawda@gmail.com>2011-10-13 11:34:16 (GMT)
committerNadeem Vawda <nadeem.vawda@gmail.com>2011-10-13 11:34:16 (GMT)
commitd41a98bdd9076eeadf4d3ba6d8db287e26b89777 (patch)
tree41e6908d7d713d1981060eded58e10c00098850c
parentf1ab47ebc4d6f6cdb0ea7f9ec73d874aae03a1f2 (diff)
downloadcpython-d41a98bdd9076eeadf4d3ba6d8db287e26b89777.zip
cpython-d41a98bdd9076eeadf4d3ba6d8db287e26b89777.tar.gz
cpython-d41a98bdd9076eeadf4d3ba6d8db287e26b89777.tar.bz2
Issue #13159: Replace FileIO's quadratic-time buffer growth algorithm with a linear-time one.
Also fix the bz2 module, whose classes used the same algorithm.
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_io/fileio.c19
-rw-r--r--Modules/bz2module.c19
3 files changed, 11 insertions, 30 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 186ea21..3c3bff8 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -121,6 +121,9 @@ Tests
Extension Modules
-----------------
+- Issue #13159: FileIO and BZ2File now use a linear-time buffer growth
+ strategy instead of a quadratic-time one.
+
- Issue #13070: Fix a crash when a TextIOWrapper caught in a reference cycle
would be finalized after the reference to its underlying BufferedRWPair's
writer got cleared by the GC.
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
index b1d492b..be5c9f8 100644
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -43,12 +43,6 @@
#define SMALLCHUNK BUFSIZ
#endif
-#if SIZEOF_INT < 4
-#define BIGCHUNK (512 * 32)
-#else
-#define BIGCHUNK (512 * 1024)
-#endif
-
typedef struct {
PyObject_HEAD
int fd;
@@ -565,15 +559,10 @@ new_buffersize(fileio *self, size_t currentsize)
}
}
#endif
- if (currentsize > SMALLCHUNK) {
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (currentsize <= BIGCHUNK)
- return currentsize + currentsize;
- else
- return currentsize + BIGCHUNK;
- }
- return currentsize + SMALLCHUNK;
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ return currentsize + (currentsize >> 3) + 6;
}
static PyObject *
diff --git a/Modules/bz2module.c b/Modules/bz2module.c
index 3e55202..a671e8d 100644
--- a/Modules/bz2module.c
+++ b/Modules/bz2module.c
@@ -218,25 +218,14 @@ Util_CatchBZ2Error(int bzerror)
#define SMALLCHUNK BUFSIZ
#endif
-#if SIZEOF_INT < 4
-#define BIGCHUNK (512 * 32)
-#else
-#define BIGCHUNK (512 * 1024)
-#endif
-
/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
static size_t
Util_NewBufferSize(size_t currentsize)
{
- if (currentsize > SMALLCHUNK) {
- /* Keep doubling until we reach BIGCHUNK;
- then keep adding BIGCHUNK. */
- if (currentsize <= BIGCHUNK)
- return currentsize + currentsize;
- else
- return currentsize + BIGCHUNK;
- }
- return currentsize + SMALLCHUNK;
+ /* Expand the buffer by an amount proportional to the current size,
+ giving us amortized linear-time behavior. Use a less-than-double
+ growth factor to avoid excessive allocation. */
+ return currentsize + (currentsize >> 3) + 6;
}
/* This is a hacked version of Python's fileobject.c:get_line(). */