Based on a suggestion from bruce@hams.com, make a trivial change to

allow using the 'a' flag as a mode for opening a GzipFile. gzip files, surprisingly enough, can be concatenated and then decompressed; the effect is to concatenate the two chunks of data. If we support it on writing, it should also be supported on reading. This *wasn't* trivial, and required rearranging the code in the reading path, particularly the _read() method. Raise IOError instead of RuntimeError in two cases, 'Not a gzipped file' and 'Unknown compression method'
author: Andrew M. Kuchling <amk@amk.ca> 1999-03-25 21:49:14 (GMT)
committer: Andrew M. Kuchling <amk@amk.ca> 1999-03-25 21:49:14 (GMT)
commit: f4f119c05538f5ae102148a0e6463c2f3fce445d (patch)
tree: ae60d5f17afc4fb20b3624eac15898c1aa112b18 /Lib/gzip.py
parent: 52a0d7d802f802a7b613ea72701b0e103635000b (diff)
download: cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.zip
cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.gz
cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.bz2
1 files changed, 67 insertions, 32 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index e0c7c5b..c0179a4 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -40,11 +40,13 @@ class GzipFile:
 
         if mode[0:1] == 'r':
             self.mode = READ
-            self._init_read()
+ 	    # Set flag indicating start of a new member
+            self._new_member = 1 
+            self.extrabuf = ""
+            self.extrasize = 0
             self.filename = filename
-            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
 
-        elif mode[0:1] == 'w':
+        elif mode[0:1] == 'w' or mode[0:1] == 'a':
             self.mode = WRITE
             self._init_write(filename)
             self.compress = zlib.compressobj(compresslevel,
@@ -59,8 +61,6 @@ class GzipFile:
 
         if self.mode == WRITE:
             self._write_gzip_header()
-        elif self.mode == READ:
-            self._read_gzip_header()
 
     def __repr__(self):
         s = repr(self.fileobj)
@@ -92,16 +92,14 @@ class GzipFile:
     def _init_read(self):
         self.crc = zlib.crc32("")
         self.size = 0
-        self.extrabuf = ""
-        self.extrasize = 0
 
     def _read_gzip_header(self):
         magic = self.fileobj.read(2)
         if magic != '\037\213':
-            raise RuntimeError, 'Not a gzipped file'
+            raise IOError, 'Not a gzipped file'
         method = ord( self.fileobj.read(1) )
         if method != 8:
-            raise RuntimeError, 'Unknown compression method'
+            raise IOError, 'Unknown compression method'
         flag = ord( self.fileobj.read(1) )
         # modtime = self.fileobj.read(4)
         # extraflag = self.fileobj.read(1)
@@ -170,37 +168,74 @@ class GzipFile:
         self.extrasize = len(buf) + self.extrasize
 
     def _read(self, size=1024):
-        try:
-            buf = self.fileobj.read(size)
-        except AttributeError:
-            raise EOFError, "Reached EOF"
+        if self.fileobj is None: raise EOFError, "Reached EOF"
+ 	
+        if self._new_member:
+            # If the _new_member flag is set, we have to 
+            # 
+            # First, check if we're at the end of the file;
+            # if so, it's time to stop; no more members to read.
+            pos = self.fileobj.tell()   # Save current position
+            self.fileobj.seek(0, 2)     # Seek to end of file
+            if pos == self.fileobj.tell():
+                self.fileobj = None
+                return EOFError, "Reached EOF"
+            else: 
+                self.fileobj.seek( pos ) # Return to original position
+  
+            self._init_read()       
+            self._read_gzip_header()
+            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+            self._new_member = 0
+ 
+        # Read a chunk of data from the file
+        buf = self.fileobj.read(size)
+ 
+        # If the EOF has been reached, flush the decompression object
+        # and mark this object as finished.
+       
         if buf == "":
             uncompress = self.decompress.flush()
-            if uncompress == "":
-                self._read_eof()
-                self.fileobj = None
-                raise EOFError, 'Reached EOF'
-        else:
-            uncompress = self.decompress.decompress(buf)
-        self.crc = zlib.crc32(uncompress, self.crc)
-        self.extrabuf = self.extrabuf + uncompress
-        self.extrasize = self.extrasize + len(uncompress)
-        self.size = self.size + len(uncompress)
+            self._read_eof()
+            self.fileobj = None
+            self._add_read_data( uncompress )
+            raise EOFError, 'Reached EOF'
+  
+        uncompress = self.decompress.decompress(buf)
+        self._add_read_data( uncompress )
+
+        if self.decompress.unused_data != "":
+            # Ending case: we've come to the end of a member in the file,
+            # so seek back to the start of the unused data, finish up
+            # this member, and read a new gzip header.
+            # (The number of bytes to seek back is the length of the unused
+            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
+
+            # Check the CRC and file size, and set the flag so we read
+            # a new member on the next call 
+            self._read_eof()
+            self._new_member = 1        
+	    
+    def _add_read_data(self, data):	        
+        self.crc = zlib.crc32(data, self.crc)
+        self.extrabuf = self.extrabuf + data
+        self.extrasize = self.extrasize + len(data)
+        self.size = self.size + len(data)
 
     def _read_eof(self):
-        # Andrew writes:
-        ## We've read to the end of the file, so we have to rewind in order
-        ## to reread the 8 bytes containing the CRC and the file size.  The
-        ## decompressor is smart and knows when to stop, so feeding it
-        ## extra data is harmless.  
-        self.fileobj.seek(-8, 2)
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.  
+        # We check the that the computed CRC and size of the
+        # uncompressed data matches the stored values.
+        self.fileobj.seek(-8, 1)
         crc32 = read32(self.fileobj)
         isize = read32(self.fileobj)
         if crc32 != self.crc:
-            self.error = "CRC check failed"
+            raise ValueError, "CRC check failed"
         elif isize != self.size:
-            self.error = "Incorrect length of data produced"
-
+            raise ValueError, "Incorrect length of data produced"
+          
     def close(self):
         if self.mode == WRITE:
             self.fileobj.write(self.compress.flush())
author	Andrew M. Kuchling <amk@amk.ca>	1999-03-25 21:49:14 (GMT)
committer	Andrew M. Kuchling <amk@amk.ca>	1999-03-25 21:49:14 (GMT)
commit	f4f119c05538f5ae102148a0e6463c2f3fce445d (patch)
tree	ae60d5f17afc4fb20b3624eac15898c1aa112b18 /Lib/gzip.py
parent	52a0d7d802f802a7b613ea72701b0e103635000b (diff)
download	cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.zip cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.gz cpython-f4f119c05538f5ae102148a0e6463c2f3fce445d.tar.bz2