Merged revisions 88528 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r88528 | lars.gustaebel | 2011-02-23 12:42:22 +0100 (Wed, 23 Feb 2011) | 16 lines Issue #11224: Improved sparse file read support (r85916) introduced a regression in _FileInFile which is used in file-like objects returned by TarFile.extractfile(). The inefficient design of the _FileInFile.read() method causes various dramatic side-effects and errors: - The data segment of a file member is read completely into memory every(!) time a small block is accessed. This is not only slow but may cause unexpected MemoryErrors with very large files. - Reading members from compressed tar archives is even slower because of the excessive backwards seeking which is done when the same data segment is read over and over again. - As a backwards seek on a TarFile opened in stream mode is not possible, using extractfile() fails with a StreamError. ........
author: Lars Gustäbel <lars@gustaebel.de> 2011-02-23 11:52:31 (GMT)
committer: Lars Gustäbel <lars@gustaebel.de> 2011-02-23 11:52:31 (GMT)
commit: 9f6cbe09cc88be914600306b34ac3d0025738465 (patch)
tree: 1a3d0213901d0c3d7d453661831f151e0b64be56
parent: dcb29c93d23112eef7f37b284a04db5f4b223787 (diff)
download: cpython-9f6cbe09cc88be914600306b34ac3d0025738465.zip
cpython-9f6cbe09cc88be914600306b34ac3d0025738465.tar.gz
cpython-9f6cbe09cc88be914600306b34ac3d0025738465.tar.bz2
3 files changed, 22 insertions, 3 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index e3747e9..0f9d1da 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -760,9 +760,8 @@ class _FileInFile(object):
                         self.map_index = 0
             length = min(size, stop - self.position)
             if data:
-                self.fileobj.seek(offset)
-                block = self.fileobj.read(stop - start)
-                buf += block[self.position - start:self.position + length]
+                self.fileobj.seek(offset + (self.position - start))
+                buf += self.fileobj.read(length)
             else:
                 buf += NUL * length
             size -= length
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 94ef61c..68e094d 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -419,6 +419,22 @@ class StreamReadTest(CommonReadTest):
 
     mode="r|"
 
+    def test_read_through(self):
+        # Issue #11224: A poorly designed _FileInFile.read() method
+        # caused seeking errors with stream tar files.
+        for tarinfo in self.tar:
+            if not tarinfo.isreg():
+                continue
+            fobj = self.tar.extractfile(tarinfo)
+            while True:
+                try:
+                    buf = fobj.read(512)
+                except tarfile.StreamError:
+                    self.fail("simple read-through using TarFile.extractfile() failed")
+                if not buf:
+                    break
+            fobj.close()
+
     def test_fileobj_regular_file(self):
         tarinfo = self.tar.next() # get "regtype" (can't use getmember)
         fobj = self.tar.extractfile(tarinfo)
diff --git a/Misc/NEWS b/Misc/NEWS
index a5ac7c1..7488686 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,6 +15,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #11224: Fixed a regression in tarfile that affected the file-like
+  objects returned by TarFile.extractfile() regarding performance, memory
+  consumption and failures with the stream interface.
+
 - Issue #11074: Make 'tokenize' so it can be reloaded.
 
 - Issue #4681: Allow mmap() to work on file sizes and offsets larger than
author	Lars Gustäbel <lars@gustaebel.de>	2011-02-23 11:52:31 (GMT)
committer	Lars Gustäbel <lars@gustaebel.de>	2011-02-23 11:52:31 (GMT)
commit	9f6cbe09cc88be914600306b34ac3d0025738465 (patch)
tree	1a3d0213901d0c3d7d453661831f151e0b64be56
parent	dcb29c93d23112eef7f37b284a04db5f4b223787 (diff)
download	cpython-9f6cbe09cc88be914600306b34ac3d0025738465.zip cpython-9f6cbe09cc88be914600306b34ac3d0025738465.tar.gz cpython-9f6cbe09cc88be914600306b34ac3d0025738465.tar.bz2