summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew M. Kuchling <amk@amk.ca>1999-03-25 21:49:14 (GMT)
committerAndrew M. Kuchling <amk@amk.ca>1999-03-25 21:49:14 (GMT)
commitf4f119c05538f5ae102148a0e6463c2f3fce445d (patch)
treeae60d5f17afc4fb20b3624eac15898c1aa112b18
parent52a0d7d802f802a7b613ea72701b0e103635000b (diff)
downloadcpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.zip
cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.gz
cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.bz2
Based on a suggestion from bruce@hams.com, make a trivial change to
allow using the 'a' flag as a mode for opening a GzipFile. gzip files, surprisingly enough, can be concatenated and then decompressed; the effect is to concatenate the two chunks of data. If we support it on writing, it should also be supported on reading. This *wasn't* trivial, and required rearranging the code in the reading path, particularly the _read() method. Raise IOError instead of RuntimeError in two cases, 'Not a gzipped file' and 'Unknown compression method'
-rw-r--r--Lib/gzip.py99
1 files changed, 67 insertions, 32 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index e0c7c5b..c0179a4 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -40,11 +40,13 @@ class GzipFile:
if mode[0:1] == 'r':
self.mode = READ
- self._init_read()
+ # Set flag indicating start of a new member
+ self._new_member = 1
+ self.extrabuf = ""
+ self.extrasize = 0
self.filename = filename
- self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
- elif mode[0:1] == 'w':
+ elif mode[0:1] == 'w' or mode[0:1] == 'a':
self.mode = WRITE
self._init_write(filename)
self.compress = zlib.compressobj(compresslevel,
@@ -59,8 +61,6 @@ class GzipFile:
if self.mode == WRITE:
self._write_gzip_header()
- elif self.mode == READ:
- self._read_gzip_header()
def __repr__(self):
s = repr(self.fileobj)
@@ -92,16 +92,14 @@ class GzipFile:
def _init_read(self):
self.crc = zlib.crc32("")
self.size = 0
- self.extrabuf = ""
- self.extrasize = 0
def _read_gzip_header(self):
magic = self.fileobj.read(2)
if magic != '\037\213':
- raise RuntimeError, 'Not a gzipped file'
+ raise IOError, 'Not a gzipped file'
method = ord( self.fileobj.read(1) )
if method != 8:
- raise RuntimeError, 'Unknown compression method'
+ raise IOError, 'Unknown compression method'
flag = ord( self.fileobj.read(1) )
# modtime = self.fileobj.read(4)
# extraflag = self.fileobj.read(1)
@@ -170,37 +168,74 @@ class GzipFile:
self.extrasize = len(buf) + self.extrasize
def _read(self, size=1024):
- try:
- buf = self.fileobj.read(size)
- except AttributeError:
- raise EOFError, "Reached EOF"
+ if self.fileobj is None: raise EOFError, "Reached EOF"
+
+ if self._new_member:
+ # If the _new_member flag is set, we have to
+ #
+ # First, check if we're at the end of the file;
+ # if so, it's time to stop; no more members to read.
+ pos = self.fileobj.tell() # Save current position
+ self.fileobj.seek(0, 2) # Seek to end of file
+ if pos == self.fileobj.tell():
+ self.fileobj = None
+ return EOFError, "Reached EOF"
+ else:
+ self.fileobj.seek( pos ) # Return to original position
+
+ self._init_read()
+ self._read_gzip_header()
+ self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+ self._new_member = 0
+
+ # Read a chunk of data from the file
+ buf = self.fileobj.read(size)
+
+ # If the EOF has been reached, flush the decompression object
+ # and mark this object as finished.
+
if buf == "":
uncompress = self.decompress.flush()
- if uncompress == "":
- self._read_eof()
- self.fileobj = None
- raise EOFError, 'Reached EOF'
- else:
- uncompress = self.decompress.decompress(buf)
- self.crc = zlib.crc32(uncompress, self.crc)
- self.extrabuf = self.extrabuf + uncompress
- self.extrasize = self.extrasize + len(uncompress)
- self.size = self.size + len(uncompress)
+ self._read_eof()
+ self.fileobj = None
+ self._add_read_data( uncompress )
+ raise EOFError, 'Reached EOF'
+
+ uncompress = self.decompress.decompress(buf)
+ self._add_read_data( uncompress )
+
+ if self.decompress.unused_data != "":
+ # Ending case: we've come to the end of a member in the file,
+ # so seek back to the start of the unused data, finish up
+ # this member, and read a new gzip header.
+ # (The number of bytes to seek back is the length of the unused
+ # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+ self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
+
+ # Check the CRC and file size, and set the flag so we read
+ # a new member on the next call
+ self._read_eof()
+ self._new_member = 1
+
+ def _add_read_data(self, data):
+ self.crc = zlib.crc32(data, self.crc)
+ self.extrabuf = self.extrabuf + data
+ self.extrasize = self.extrasize + len(data)
+ self.size = self.size + len(data)
def _read_eof(self):
- # Andrew writes:
- ## We've read to the end of the file, so we have to rewind in order
- ## to reread the 8 bytes containing the CRC and the file size. The
- ## decompressor is smart and knows when to stop, so feeding it
- ## extra data is harmless.
- self.fileobj.seek(-8, 2)
+ # We've read to the end of the file, so we have to rewind in order
+ # to reread the 8 bytes containing the CRC and the file size.
+ # We check the that the computed CRC and size of the
+ # uncompressed data matches the stored values.
+ self.fileobj.seek(-8, 1)
crc32 = read32(self.fileobj)
isize = read32(self.fileobj)
if crc32 != self.crc:
- self.error = "CRC check failed"
+ raise ValueError, "CRC check failed"
elif isize != self.size:
- self.error = "Incorrect length of data produced"
-
+ raise ValueError, "Incorrect length of data produced"
+
def close(self):
if self.mode == WRITE:
self.fileobj.write(self.compress.flush())