summaryrefslogtreecommitdiffstats
path: root/Objects/fileobject.c
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-03-23 10:03:50 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-03-23 10:03:50 (GMT)
commitddea208be9e2a8fa281e25ebbc890378dd2aa286 (patch)
treedfce6c87f4eaac29a358e4064cfb10600b18ba98 /Objects/fileobject.c
parent91cc17d20e0ad668944fcf8ef9a6c523455d64d7 (diff)
downloadcpython-ddea208be9e2a8fa281e25ebbc890378dd2aa286.zip
cpython-ddea208be9e2a8fa281e25ebbc890378dd2aa286.tar.gz
cpython-ddea208be9e2a8fa281e25ebbc890378dd2aa286.tar.bz2
Give Python a debug-mode pymalloc, much as sketched on Python-Dev.
When WITH_PYMALLOC is defined, define PYMALLOC_DEBUG to enable the debug allocator. This can be done independent of build type (release or debug). A debug build automatically defines PYMALLOC_DEBUG when pymalloc is enabled. It's a detected error to define PYMALLOC_DEBUG when pymalloc isn't enabled. Two debugging entry points defined only under PYMALLOC_DEBUG: + _PyMalloc_DebugCheckAddress(const void *p) can be used (e.g., from gdb) to sanity-check a memory block obtained from pymalloc. It sprays info to stderr (see next) and dies via Py_FatalError if the block is detectably damaged. + _PyMalloc_DebugDumpAddress(const void *p) can be used to spray info about a debug memory block to stderr. A tiny start at implementing "API family" checks isn't good for anything yet. _PyMalloc_DebugRealloc() has been optimized to do little when the new size is <= old size. However, if the new size is larger, it really can't call the underlying realloc() routine without either violating its contract, or knowing something non-trivial about how the underlying realloc() works. A memcpy is always done in this case. This was a disaster for (and only) one of the std tests: test_bufio creates single text file lines up to a million characters long. On Windows, fileobject.c's get_line() uses the horridly funky getline_via_fgets(), which keeps growing and growing a string object hoping to find a newline. It grew the string object 1000 bytes each time, so for a million-character string it took approximately forever (I gave up after a few minutes). So, also: fileobject.c, getline_via_fgets(): When a single line is outrageously long, grow the string object at a mildly exponential rate, instead of just 1000 bytes at a time. That's enough so that a debug-build test_bufio finishes in about 5 seconds on my Win98SE box. I'm curious to try this on Win2K, because it has very different memory behavior than Win9X, and test_bufio always took a factor of 10 longer to complete on Win2K. It *could* be that the endless reallocs were simply killing it on Win2K even in the release build.
Diffstat (limited to 'Objects/fileobject.c')
-rw-r--r--Objects/fileobject.c13
1 files changed, 5 insertions, 8 deletions
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 47e6b17..6a82cce 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -772,13 +772,9 @@ getline_via_fgets(FILE *fp)
* cautions about boosting that. 300 was chosen because the worst real-life
* text-crunching job reported on Python-Dev was a mail-log crawler where over
* half the lines were 254 chars.
- * INCBUFSIZE is the amount by which we grow the buffer, if MAXBUFSIZE isn't
- * enough. It doesn't much matter what this is set to: we only get here for
- * absurdly long lines anyway.
*/
#define INITBUFSIZE 100
#define MAXBUFSIZE 300
-#define INCBUFSIZE 1000
char* p; /* temp */
char buf[MAXBUFSIZE];
PyObject* v; /* the string object result */
@@ -786,6 +782,7 @@ getline_via_fgets(FILE *fp)
char* pvend; /* address one beyond last free slot */
size_t nfree; /* # of free buffer slots; pvend-pvfree */
size_t total_v_size; /* total # of slots in buffer */
+ size_t increment; /* amount to increment the buffer */
/* Optimize for normal case: avoid _PyString_Resize if at all
* possible via first reading into stack buffer "buf".
@@ -853,7 +850,7 @@ getline_via_fgets(FILE *fp)
/* The stack buffer isn't big enough; malloc a string object and read
* into its buffer.
*/
- total_v_size = MAXBUFSIZE + INCBUFSIZE;
+ total_v_size = MAXBUFSIZE << 1;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL)
return v;
@@ -897,7 +894,8 @@ getline_via_fgets(FILE *fp)
}
/* expand buffer and try again */
assert(*(pvend-1) == '\0');
- total_v_size += INCBUFSIZE;
+ increment = total_v_size >> 2; /* mild exponential growth */
+ total_v_size += increment;
if (total_v_size > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"line is longer than a Python string can hold");
@@ -907,14 +905,13 @@ getline_via_fgets(FILE *fp)
if (_PyString_Resize(&v, (int)total_v_size) < 0)
return NULL;
/* overwrite the trailing null byte */
- pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
+ pvfree = BUF(v) + (total_v_size - increment - 1);
}
if (BUF(v) + total_v_size != p)
_PyString_Resize(&v, p - BUF(v));
return v;
#undef INITBUFSIZE
#undef MAXBUFSIZE
-#undef INCBUFSIZE
}
#endif /* ifdef USE_FGETS_IN_GETLINE */