summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-11-05 20:38:55 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-11-05 20:38:55 (GMT)
commit9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4 (patch)
tree7d5a21cd8f3f32e6eb093e4399de65d2df4a157e
parentcd8fdbb3fad7f81a84507d2294613016fccd0954 (diff)
downloadcpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.zip
cpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.tar.gz
cpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.tar.bz2
Another round on SF patch 618135: gzip.py and files > 2G
The last round boosted "the limit" from 2GB to 4GB. This round gets rid of the 4GB limit. For files > 4GB, gzip stores just the last 32 bits of the file size, and now we play along with that too. Tested by hand (on a 6+GB file) on Win2K. Boosting from 2GB to 4GB was arguably enough "a bugfix". Going beyond that smells more like "new feature" to me.
-rw-r--r--Lib/gzip.py13
-rw-r--r--Misc/NEWS9
2 files changed, 15 insertions, 7 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 8802adb..36f9c36 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -24,6 +24,10 @@ def U32(i):
i += 1L << 32
return i
+def LOWU32(i):
+ """Return the low-order 32 bits of an int, as a non-negative int."""
+ return i & 0xFFFFFFFFL
+
def write32(output, value):
output.write(struct.pack("<l", value))
@@ -295,21 +299,22 @@ class GzipFile:
# We've read to the end of the file, so we have to rewind in order
# to reread the 8 bytes containing the CRC and the file size.
# We check the that the computed CRC and size of the
- # uncompressed data matches the stored values.
+ # uncompressed data matches the stored values. Note that the size
+ # stored is the true file size mod 2**32.
self.fileobj.seek(-8, 1)
crc32 = read32(self.fileobj)
isize = U32(read32(self.fileobj)) # may exceed 2GB
if U32(crc32) != U32(self.crc):
raise ValueError, "CRC check failed"
- elif isize != self.size:
+ elif isize != LOWU32(self.size):
raise ValueError, "Incorrect length of data produced"
def close(self):
if self.mode == WRITE:
self.fileobj.write(self.compress.flush())
write32(self.fileobj, self.crc)
- # self.size may exceed 2GB
- write32u(self.fileobj, self.size)
+ # self.size may exceed 2GB, or even 4GB
+ write32u(self.fileobj, LOWU32(self.size))
self.fileobj = None
elif self.mode == READ:
self.fileobj = None
diff --git a/Misc/NEWS b/Misc/NEWS
index c43474e..96263e7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -355,9 +355,12 @@ Extension modules
Library
-------
-- gzip.py now handles files exceeding 2GB. Note that 4GB is still a
- fundamental limitation of the underlying gzip file format (it only
- has 32 bits to record the file size).
+- gzip.py now handles files exceeding 2GB. Files over 4GB also work
+ now (provided the OS supports it, and Python is configured with large
+ file support), but in that case the underlying gzip file format can
+ record only the least-significant 32 bits of the file size, so that
+ some tools working with gzipped files may report an incorrect file
+ size.
- xml.sax.saxutils.unescape has been added, to replace entity references
with their entity value.