From 0357268d96d4ff3546cfd89f594a5630a3adf747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Gust=C3=A4bel?= Date: Mon, 6 Jul 2015 09:27:24 +0200 Subject: Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment. --- Lib/tarfile.py | 22 +++++++++++++++------- Lib/test/test_tarfile.py | 23 +++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 6d8d36c..5f1a979 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -225,7 +225,7 @@ def calc_chksums(buf): signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum -def copyfileobj(src, dst, length=None): +def copyfileobj(src, dst, length=None, exception=OSError): """Copy length bytes from fileobj src to fileobj dst. If length is None, copy the entire content. """ @@ -240,13 +240,13 @@ def copyfileobj(src, dst, length=None): for b in range(blocks): buf = src.read(BUFSIZE) if len(buf) < BUFSIZE: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) if remainder != 0: buf = src.read(remainder) if len(buf) < remainder: - raise OSError("end of file reached") + raise exception("unexpected end of data") dst.write(buf) return @@ -690,7 +690,10 @@ class _FileInFile(object): length = min(size, stop - self.position) if data: self.fileobj.seek(offset + (self.position - start)) - buf += self.fileobj.read(length) + b = self.fileobj.read(length) + if len(b) != length: + raise ReadError("unexpected end of data") + buf += b else: buf += NUL * length size -= length @@ -2132,9 +2135,9 @@ class TarFile(object): if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) - copyfileobj(source, target, size) + copyfileobj(source, target, size, ReadError) else: - copyfileobj(source, target, tarinfo.size) + copyfileobj(source, target, tarinfo.size, ReadError) target.seek(tarinfo.size) target.truncate() @@ -2244,8 +2247,13 @@ class TarFile(object): self.firstmember = None return m + # Advance the file pointer. + if self.offset != self.fileobj.tell(): + self.fileobj.seek(self.offset - 1) + if not self.fileobj.read(1): + raise ReadError("unexpected end of data") + # Read the next block. - self.fileobj.seek(self.offset) tarinfo = None while True: try: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 5b55e07..8ab7575 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -349,6 +349,29 @@ class CommonReadTest(ReadTest): finally: tar.close() + def test_premature_end_of_archive(self): + for size in (512, 600, 1024, 1200): + with tarfile.open(tmpname, "w:") as tar: + t = tarfile.TarInfo("foo") + t.size = 1024 + tar.addfile(t, io.BytesIO(b"a" * 1024)) + + with open(tmpname, "r+b") as fobj: + fobj.truncate(size) + + with tarfile.open(tmpname) as tar: + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + for t in tar: + pass + + with tarfile.open(tmpname) as tar: + t = tar.next() + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extract(t, TEMPDIR) + + with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): + tar.extractfile(t).read() class MiscReadTestBase(CommonReadTest): def requires_name_attribute(self): diff --git a/Misc/NEWS b/Misc/NEWS index d209195..8713f31 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -66,6 +66,9 @@ Core and Builtins Library ------- +- Issue #24259: tarfile now raises a ReadError if an archive is truncated + inside a data segment. + - Issue #24552: Fix use after free in an error case of the _pickle module. - Issue #24514: tarfile now tolerates number fields consisting of only -- cgit v0.12