From ed1ac587df34dc30ddd49549d3286befb059dd31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gust=C3=A4bel?= Date: Tue, 6 Dec 2011 12:56:38 +0100 Subject: Correctly detect bzip2 compressed streams with blocksizes other than 900k. --- Lib/tarfile.py | 2 +- Lib/test/test_tarfile.py | 22 ++++++++++++++++++---- Misc/NEWS | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index d8d94f0..226d481 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -627,7 +627,7 @@ class _StreamProxy(object): def getcomptype(self): if self.buf.startswith(b"\037\213\010"): return "gz" - if self.buf.startswith(b"BZh91"): + if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": return "bz2" return "tar" diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 37cfbba..1757e44 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -529,6 +529,23 @@ class DetectReadTest(unittest.TestCase): def test_detect_fileobj(self): self._test_modes(self._testfunc_fileobj) + def test_detect_stream_bz2(self): + # Originally, tarfile's stream detection looked for the string + # "BZh91" at the start of the file. This is incorrect because + # the '9' represents the blocksize (900kB). If the file was + # compressed using another blocksize autodetection fails. + if not bz2: + return + + with open(tarname, "rb") as fobj: + data = fobj.read() + + # Compress with blocksize 100kB, the file starts with "BZh11". + with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj: + fobj.write(data) + + self._testfunc_file(tmpname, "r|*") + class MemberReadTest(ReadTest): @@ -1818,11 +1835,8 @@ def test_main(): if bz2: # Create testtar.tar.bz2 and add bz2-specific tests. support.unlink(bz2name) - tar = bz2.BZ2File(bz2name, "wb") - try: + with bz2.BZ2File(bz2name, "wb") as tar: tar.write(data) - finally: - tar.close() tests += [ Bz2MiscReadTest, diff --git a/Misc/NEWS b/Misc/NEWS index e65e805..8640d70 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -90,6 +90,9 @@ Core and Builtins Library ------- +- tarfile.py: Correctly detect bzip2 compressed streams with blocksizes + other than 900k. + - Issue #13439: Fix many errors in turtle docstrings. - Issue #13487: Make inspect.getmodule robust against changes done to -- cgit v0.12