diff options
author | Andrew M. Kuchling <amk@amk.ca> | 1999-03-25 21:49:14 (GMT) |
---|---|---|
committer | Andrew M. Kuchling <amk@amk.ca> | 1999-03-25 21:49:14 (GMT) |
commit | f4f119c05538f5ae102148a0e6463c2f3fce445d (patch) | |
tree | ae60d5f17afc4fb20b3624eac15898c1aa112b18 | |
parent | 52a0d7d802f802a7b613ea72701b0e103635000b (diff) | |
download | cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.zip cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.gz cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.bz2 |
Based on a suggestion from bruce@hams.com, make a trivial change to
allow using the 'a' flag as a mode for opening a GzipFile. gzip
files, surprisingly enough, can be concatenated and then decompressed;
the effect is to concatenate the two chunks of data.
If we support it on writing, it should also be supported on reading.
This *wasn't* trivial, and required rearranging the code in the
reading path, particularly the _read() method.
Raise IOError instead of RuntimeError in two cases, 'Not a gzipped file'
and 'Unknown compression method'
-rw-r--r-- | Lib/gzip.py | 99 |
1 files changed, 67 insertions, 32 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index e0c7c5b..c0179a4 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -40,11 +40,13 @@ class GzipFile: if mode[0:1] == 'r': self.mode = READ - self._init_read() + # Set flag indicating start of a new member + self._new_member = 1 + self.extrabuf = "" + self.extrasize = 0 self.filename = filename - self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) - elif mode[0:1] == 'w': + elif mode[0:1] == 'w' or mode[0:1] == 'a': self.mode = WRITE self._init_write(filename) self.compress = zlib.compressobj(compresslevel, @@ -59,8 +61,6 @@ class GzipFile: if self.mode == WRITE: self._write_gzip_header() - elif self.mode == READ: - self._read_gzip_header() def __repr__(self): s = repr(self.fileobj) @@ -92,16 +92,14 @@ class GzipFile: def _init_read(self): self.crc = zlib.crc32("") self.size = 0 - self.extrabuf = "" - self.extrasize = 0 def _read_gzip_header(self): magic = self.fileobj.read(2) if magic != '\037\213': - raise RuntimeError, 'Not a gzipped file' + raise IOError, 'Not a gzipped file' method = ord( self.fileobj.read(1) ) if method != 8: - raise RuntimeError, 'Unknown compression method' + raise IOError, 'Unknown compression method' flag = ord( self.fileobj.read(1) ) # modtime = self.fileobj.read(4) # extraflag = self.fileobj.read(1) @@ -170,37 +168,74 @@ class GzipFile: self.extrasize = len(buf) + self.extrasize def _read(self, size=1024): - try: - buf = self.fileobj.read(size) - except AttributeError: - raise EOFError, "Reached EOF" + if self.fileobj is None: raise EOFError, "Reached EOF" + + if self._new_member: + # If the _new_member flag is set, we have to + # + # First, check if we're at the end of the file; + # if so, it's time to stop; no more members to read. + pos = self.fileobj.tell() # Save current position + self.fileobj.seek(0, 2) # Seek to end of file + if pos == self.fileobj.tell(): + self.fileobj = None + return EOFError, "Reached EOF" + else: + self.fileobj.seek( pos ) # Return to original position + + self._init_read() + self._read_gzip_header() + self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) + self._new_member = 0 + + # Read a chunk of data from the file + buf = self.fileobj.read(size) + + # If the EOF has been reached, flush the decompression object + # and mark this object as finished. + if buf == "": uncompress = self.decompress.flush() - if uncompress == "": - self._read_eof() - self.fileobj = None - raise EOFError, 'Reached EOF' - else: - uncompress = self.decompress.decompress(buf) - self.crc = zlib.crc32(uncompress, self.crc) - self.extrabuf = self.extrabuf + uncompress - self.extrasize = self.extrasize + len(uncompress) - self.size = self.size + len(uncompress) + self._read_eof() + self.fileobj = None + self._add_read_data( uncompress ) + raise EOFError, 'Reached EOF' + + uncompress = self.decompress.decompress(buf) + self._add_read_data( uncompress ) + + if self.decompress.unused_data != "": + # Ending case: we've come to the end of a member in the file, + # so seek back to the start of the unused data, finish up + # this member, and read a new gzip header. + # (The number of bytes to seek back is the length of the unused + # data, minus 8 because _read_eof() will rewind a further 8 bytes) + self.fileobj.seek( -len(self.decompress.unused_data)+8, 1) + + # Check the CRC and file size, and set the flag so we read + # a new member on the next call + self._read_eof() + self._new_member = 1 + + def _add_read_data(self, data): + self.crc = zlib.crc32(data, self.crc) + self.extrabuf = self.extrabuf + data + self.extrasize = self.extrasize + len(data) + self.size = self.size + len(data) def _read_eof(self): - # Andrew writes: - ## We've read to the end of the file, so we have to rewind in order - ## to reread the 8 bytes containing the CRC and the file size. The - ## decompressor is smart and knows when to stop, so feeding it - ## extra data is harmless. - self.fileobj.seek(-8, 2) + # We've read to the end of the file, so we have to rewind in order + # to reread the 8 bytes containing the CRC and the file size. + # We check the that the computed CRC and size of the + # uncompressed data matches the stored values. + self.fileobj.seek(-8, 1) crc32 = read32(self.fileobj) isize = read32(self.fileobj) if crc32 != self.crc: - self.error = "CRC check failed" + raise ValueError, "CRC check failed" elif isize != self.size: - self.error = "Incorrect length of data produced" - + raise ValueError, "Incorrect length of data produced" + def close(self): if self.mode == WRITE: self.fileobj.write(self.compress.flush()) |