diff options
author | Tim Peters <tim.peters@gmail.com> | 2002-11-05 20:38:55 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2002-11-05 20:38:55 (GMT) |
commit | 9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4 (patch) | |
tree | 7d5a21cd8f3f32e6eb093e4399de65d2df4a157e | |
parent | cd8fdbb3fad7f81a84507d2294613016fccd0954 (diff) | |
download | cpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.zip cpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.tar.gz cpython-9288f95cb5d7be807c2ba6dbc195ec6b52ac3bd4.tar.bz2 |
Another round on SF patch 618135: gzip.py and files > 2G
The last round boosted "the limit" from 2GB to 4GB. This round gets
rid of the 4GB limit. For files > 4GB, gzip stores just the last 32
bits of the file size, and now we play along with that too. Tested
by hand (on a 6+GB file) on Win2K.
Boosting from 2GB to 4GB was arguably enough "a bugfix". Going beyond
that smells more like "new feature" to me.
-rw-r--r-- | Lib/gzip.py | 13 | ||||
-rw-r--r-- | Misc/NEWS | 9 |
2 files changed, 15 insertions, 7 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 8802adb..36f9c36 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -24,6 +24,10 @@ def U32(i): i += 1L << 32 return i +def LOWU32(i): + """Return the low-order 32 bits of an int, as a non-negative int.""" + return i & 0xFFFFFFFFL + def write32(output, value): output.write(struct.pack("<l", value)) @@ -295,21 +299,22 @@ class GzipFile: # We've read to the end of the file, so we have to rewind in order # to reread the 8 bytes containing the CRC and the file size. # We check the that the computed CRC and size of the - # uncompressed data matches the stored values. + # uncompressed data matches the stored values. Note that the size + # stored is the true file size mod 2**32. self.fileobj.seek(-8, 1) crc32 = read32(self.fileobj) isize = U32(read32(self.fileobj)) # may exceed 2GB if U32(crc32) != U32(self.crc): raise ValueError, "CRC check failed" - elif isize != self.size: + elif isize != LOWU32(self.size): raise ValueError, "Incorrect length of data produced" def close(self): if self.mode == WRITE: self.fileobj.write(self.compress.flush()) write32(self.fileobj, self.crc) - # self.size may exceed 2GB - write32u(self.fileobj, self.size) + # self.size may exceed 2GB, or even 4GB + write32u(self.fileobj, LOWU32(self.size)) self.fileobj = None elif self.mode == READ: self.fileobj = None @@ -355,9 +355,12 @@ Extension modules Library ------- -- gzip.py now handles files exceeding 2GB. Note that 4GB is still a - fundamental limitation of the underlying gzip file format (it only - has 32 bits to record the file size). +- gzip.py now handles files exceeding 2GB. Files over 4GB also work + now (provided the OS supports it, and Python is configured with large + file support), but in that case the underlying gzip file format can + record only the least-significant 32 bits of the file size, so that + some tools working with gzipped files may report an incorrect file + size. - xml.sax.saxutils.unescape has been added, to replace entity references with their entity value. |