diff options
author | Guido van Rossum <guido@python.org> | 2007-06-13 00:03:05 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-06-13 00:03:05 (GMT) |
commit | f09ca140acdae8efe1a61a813cdc4bed90ad004b (patch) | |
tree | 3d376b9057f221351c13a2c063ccb8f1b39b78be /Modules/bz2module.c | |
parent | da5b8f2d28f2f7ce47be5d88244eaefc66f7de3e (diff) | |
download | cpython-f09ca140acdae8efe1a61a813cdc4bed90ad004b.zip cpython-f09ca140acdae8efe1a61a813cdc4bed90ad004b.tar.gz cpython-f09ca140acdae8efe1a61a813cdc4bed90ad004b.tar.bz2 |
Checkpoint: half-fixed the bz2 module. 'U' is no longer supported.
Diffstat (limited to 'Modules/bz2module.c')
-rw-r--r-- | Modules/bz2module.c | 253 |
1 files changed, 42 insertions, 211 deletions
diff --git a/Modules/bz2module.c b/Modules/bz2module.c index 029232c..6e8e85f 100644 --- a/Modules/bz2module.c +++ b/Modules/bz2module.c @@ -96,16 +96,12 @@ typedef fpos_t Py_off_t; typedef struct { PyObject_HEAD - PyObject *file; + FILE *rawfp; char* f_buf; /* Allocated readahead buffer */ char* f_bufend; /* Points after last occupied position */ char* f_bufptr; /* Current buffer position */ - int f_univ_newline; /* Handle any newline convention */ - int f_newlinetypes; /* Types of newlines seen */ - int f_skipnextlf; /* Skip next \n */ - BZFILE *fp; int mode; Py_off_t pos; @@ -233,9 +229,6 @@ Util_GetLine(BZ2FileObject *f, int n) size_t increment; /* amount to increment the buffer */ PyObject *v; int bzerror; - int newlinetypes = f->f_newlinetypes; - int skipnextlf = f->f_skipnextlf; - int univ_newline = f->f_univ_newline; total_v_size = n > 0 ? n : 100; v = PyBytes_FromStringAndSize((char *)NULL, total_v_size); @@ -247,47 +240,12 @@ Util_GetLine(BZ2FileObject *f, int n) for (;;) { Py_BEGIN_ALLOW_THREADS - if (univ_newline) { - while (1) { - BZ2_bzRead(&bzerror, f->fp, &c, 1); - f->pos++; - if (bzerror != BZ_OK || buf == end) - break; - if (skipnextlf) { - skipnextlf = 0; - if (c == '\n') { - /* Seeing a \n here with - * skipnextlf true means we - * saw a \r before. - */ - newlinetypes |= NEWLINE_CRLF; - BZ2_bzRead(&bzerror, f->fp, - &c, 1); - if (bzerror != BZ_OK) - break; - } else { - newlinetypes |= NEWLINE_CR; - } - } - if (c == '\r') { - skipnextlf = 1; - c = '\n'; - } else if ( c == '\n') - newlinetypes |= NEWLINE_LF; - *buf++ = c; - if (c == '\n') break; - } - if (bzerror == BZ_STREAM_END && skipnextlf) - newlinetypes |= NEWLINE_CR; - } else /* If not universal newlines use the normal loop */ - do { - BZ2_bzRead(&bzerror, f->fp, &c, 1); - f->pos++; - *buf++ = c; - } while (bzerror == BZ_OK && c != '\n' && buf != end); + do { + BZ2_bzRead(&bzerror, f->fp, &c, 1); + f->pos++; + *buf++ = c; + } while (bzerror == BZ_OK && c != '\n' && buf != end); Py_END_ALLOW_THREADS - f->f_newlinetypes = newlinetypes; - f->f_skipnextlf = skipnextlf; if (bzerror == BZ_STREAM_END) { f->size = f->pos; f->mode = MODE_READ_EOF; @@ -329,74 +287,6 @@ Util_GetLine(BZ2FileObject *f, int n) return v; } -/* This is a hacked version of Python's - * fileobject.c:Py_UniversalNewlineFread(). */ -size_t -Util_UnivNewlineRead(int *bzerror, BZFILE *stream, - char* buf, size_t n, BZ2FileObject *f) -{ - char *dst = buf; - int newlinetypes, skipnextlf; - - assert(buf != NULL); - assert(stream != NULL); - - if (!f->f_univ_newline) - return BZ2_bzRead(bzerror, stream, buf, n); - - newlinetypes = f->f_newlinetypes; - skipnextlf = f->f_skipnextlf; - - /* Invariant: n is the number of bytes remaining to be filled - * in the buffer. - */ - while (n) { - size_t nread; - int shortread; - char *src = dst; - - nread = BZ2_bzRead(bzerror, stream, dst, n); - assert(nread <= n); - n -= nread; /* assuming 1 byte out for each in; will adjust */ - shortread = n != 0; /* true iff EOF or error */ - while (nread--) { - char c = *src++; - if (c == '\r') { - /* Save as LF and set flag to skip next LF. */ - *dst++ = '\n'; - skipnextlf = 1; - } - else if (skipnextlf && c == '\n') { - /* Skip LF, and remember we saw CR LF. */ - skipnextlf = 0; - newlinetypes |= NEWLINE_CRLF; - ++n; - } - else { - /* Normal char to be stored in buffer. Also - * update the newlinetypes flag if either this - * is an LF or the previous char was a CR. - */ - if (c == '\n') - newlinetypes |= NEWLINE_LF; - else if (skipnextlf) - newlinetypes |= NEWLINE_CR; - *dst++ = c; - skipnextlf = 0; - } - } - if (shortread) { - /* If this is EOF, update type flags. */ - if (skipnextlf && *bzerror == BZ_STREAM_END) - newlinetypes |= NEWLINE_CR; - break; - } - } - f->f_newlinetypes = newlinetypes; - f->f_skipnextlf = skipnextlf; - return dst - buf; -} - /* This is a hacked version of Python's fileobject.c:drop_readahead(). */ static void Util_DropReadAhead(BZ2FileObject *f) @@ -429,8 +319,7 @@ Util_ReadAhead(BZ2FileObject *f, int bufsize) return -1; } Py_BEGIN_ALLOW_THREADS - chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf, - bufsize, f); + chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize); Py_END_ALLOW_THREADS f->pos += chunksize; if (bzerror == BZ_STREAM_END) { @@ -548,10 +437,9 @@ BZ2File_read(BZ2FileObject *self, PyObject *args) for (;;) { Py_BEGIN_ALLOW_THREADS - chunksize = Util_UnivNewlineRead(&bzerror, self->fp, - BUF(ret)+bytesread, - buffersize-bytesread, - self); + chunksize = BZ2_bzRead(&bzerror, self->fp, + BUF(ret)+bytesread, + buffersize-bytesread); self->pos += chunksize; Py_END_ALLOW_THREADS bytesread += chunksize; @@ -685,9 +573,8 @@ BZ2File_readlines(BZ2FileObject *self, PyObject *args) for (;;) { Py_BEGIN_ALLOW_THREADS - nread = Util_UnivNewlineRead(&bzerror, self->fp, - buffer+nfilled, - buffersize-nfilled, self); + nread = BZ2_bzRead(&bzerror, self->fp, + buffer+nfilled, buffersize-nfilled); self->pos += nread; Py_END_ALLOW_THREADS if (bzerror == BZ_STREAM_END) { @@ -1043,10 +930,8 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) assert(self->mode != MODE_READ_EOF); for (;;) { Py_BEGIN_ALLOW_THREADS - chunksize = Util_UnivNewlineRead( - &bzerror, self->fp, - buffer, buffersize, - self); + chunksize = BZ2_bzRead(&bzerror, self->fp, + buffer, buffersize); self->pos += chunksize; Py_END_ALLOW_THREADS @@ -1075,19 +960,14 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) offset -= self->pos; } else { /* we cannot move back, so rewind the stream */ - FILE *fp = NULL; /* XXX temporary!!! */ BZ2_bzReadClose(&bzerror, self->fp); if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); goto cleanup; } - ret = PyObject_CallMethod(self->file, "seek", "(i)", 0); - if (!ret) - goto cleanup; - Py_DECREF(ret); - ret = NULL; + rewind(self->rawfp); self->pos = 0; - self->fp = BZ2_bzReadOpen(&bzerror, fp, + self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp, 0, 0, NULL, 0); if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); @@ -1110,8 +990,7 @@ BZ2File_seek(BZ2FileObject *self, PyObject *args) * condition above). buffersize is 8192. */ readsize = (size_t)(offset-bytesread); Py_BEGIN_ALLOW_THREADS - chunksize = Util_UnivNewlineRead(&bzerror, self->fp, - buffer, readsize, self); + chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize); self->pos += chunksize; Py_END_ALLOW_THREADS bytesread += chunksize; @@ -1177,6 +1056,10 @@ BZ2File_close(BZ2FileObject *self) PyObject *ret = NULL; int bzerror = BZ_OK; + if (self->mode == MODE_CLOSED) { + Py_RETURN_NONE; + } + ACQUIRE_LOCK(self); switch (self->mode) { case MODE_READ: @@ -1189,11 +1072,14 @@ BZ2File_close(BZ2FileObject *self) break; } self->mode = MODE_CLOSED; - ret = PyObject_CallMethod(self->file, "close", NULL); - if (bzerror != BZ_OK) { + fclose(self->rawfp); + self->rawfp = NULL; + if (bzerror == BZ_OK) { + Py_INCREF(Py_None); + ret = Py_None; + } + else { Util_CatchBZ2Error(bzerror); - Py_XDECREF(ret); - ret = NULL; } RELEASE_LOCK(self); @@ -1218,63 +1104,15 @@ static PyMethodDef BZ2File_methods[] = { /* ===================================================================== */ /* Getters and setters of BZ2File. */ -/* This is a hacked version of Python's fileobject.c:get_newlines(). */ -static PyObject * -BZ2File_get_newlines(BZ2FileObject *self, void *closure) -{ - switch (self->f_newlinetypes) { - case NEWLINE_UNKNOWN: - Py_INCREF(Py_None); - return Py_None; - case NEWLINE_CR: - return PyBytes_FromStringAndSize("\r", 1); - case NEWLINE_LF: - return PyBytes_FromStringAndSize("\n", 1); - case NEWLINE_CR|NEWLINE_LF: - return Py_BuildValue("(ss)", "\r", "\n"); - case NEWLINE_CRLF: - return PyBytes_FromStringAndSize("\r\n", 2); - case NEWLINE_CR|NEWLINE_CRLF: - return Py_BuildValue("(ss)", "\r", "\r\n"); - case NEWLINE_LF|NEWLINE_CRLF: - return Py_BuildValue("(ss)", "\n", "\r\n"); - case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF: - return Py_BuildValue("(sss)", "\r", "\n", "\r\n"); - default: - PyErr_Format(PyExc_SystemError, - "Unknown newlines value 0x%x\n", - self->f_newlinetypes); - return NULL; - } -} - static PyObject * BZ2File_get_closed(BZ2FileObject *self, void *closure) { return PyInt_FromLong(self->mode == MODE_CLOSED); } -static PyObject * -BZ2File_get_mode(BZ2FileObject *self, void *closure) -{ - return PyObject_GetAttrString(self->file, "mode"); -} - -static PyObject * -BZ2File_get_name(BZ2FileObject *self, void *closure) -{ - return PyObject_GetAttrString(self->file, "name"); -} - static PyGetSetDef BZ2File_getset[] = { {"closed", (getter)BZ2File_get_closed, NULL, "True if the file is closed"}, - {"newlines", (getter)BZ2File_get_newlines, NULL, - "end-of-line convention used in this file"}, - {"mode", (getter)BZ2File_get_mode, NULL, - "file mode ('r', 'w', or 'U')"}, - {"name", (getter)BZ2File_get_name, NULL, - "file name"}, {NULL} /* Sentinel */ }; @@ -1286,9 +1124,8 @@ static int BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = {"filename", "mode", "buffering", - "compresslevel", 0}; - FILE *fp = NULL; /* XXX temporary!!! */ - PyObject *name; + "compresslevel", 0}; + char *name; char *mode = "r"; int buffering = -1; int compresslevel = 9; @@ -1297,7 +1134,7 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) self->size = -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File", + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File", kwlist, &name, &mode, &buffering, &compresslevel)) return -1; @@ -1321,14 +1158,6 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) case 'b': break; - case 'U': -#ifdef __VMS - self->f_univ_newline = 0; -#else - self->f_univ_newline = 1; -#endif - break; - default: error = 1; break; @@ -1349,10 +1178,12 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) mode = (mode_char == 'r') ? "rb" : "wb"; - self->file = NULL; /* XXX io.open(name, mode, buffering); */ - PyErr_SetString(PyExc_RuntimeError, "can't open bz2 files yet"); - if (self->file == NULL) + self->rawfp = fopen(name, mode); + if (self->rawfp == NULL) { + PyErr_SetFromErrno(PyExc_IOError); return -1; + } + /* XXX Ignore buffering */ /* From now on, we have stuff to dealloc, so jump to error label * instead of returning */ @@ -1366,12 +1197,10 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) #endif if (mode_char == 'r') - self->fp = BZ2_bzReadOpen(&bzerror, - fp, + self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp, 0, 0, NULL, 0); else - self->fp = BZ2_bzWriteOpen(&bzerror, - fp, + self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp, compresslevel, 0, 0); if (bzerror != BZ_OK) { @@ -1384,7 +1213,8 @@ BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs) return 0; error: - Py_CLEAR(self->file); + fclose(self->rawfp); + self->rawfp = NULL; #ifdef WITH_THREAD if (self->lock) { PyThread_free_lock(self->lock); @@ -1413,7 +1243,8 @@ BZ2File_dealloc(BZ2FileObject *self) break; } Util_DropReadAhead(self); - Py_XDECREF(self->file); + if (self->rawfp != NULL) + fclose(self->rawfp); self->ob_type->tp_free((PyObject *)self); } |