summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/tarfile.rst13
-rw-r--r--Lib/tarfile.py196
-rw-r--r--Lib/test/test_tarfile.py69
-rw-r--r--Misc/NEWS2
4 files changed, 80 insertions, 200 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 92e9df7..86dd33d 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -376,15 +376,12 @@ be finalized; only the internally used file object will be closed. See the
.. method:: TarFile.extractfile(member)
Extract a member from the archive as a file object. *member* may be a filename
- or a :class:`TarInfo` object. If *member* is a regular file, a :term:`file-like
- object` is returned. If *member* is a link, a file-like object is constructed from
- the link's target. If *member* is none of the above, :const:`None` is returned.
+ or a :class:`TarInfo` object. If *member* is a regular file or a link, an
+ :class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is
+ returned.
- .. note::
-
- The file-like object is read-only. It provides the methods
- :meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`,
- and :meth:`close`, and also supports iteration over its lines.
+ .. versionchanged:: 3.3
+ Return an :class:`io.BufferedReader` object.
.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None)
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index efb2773..e273787 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -668,6 +668,8 @@ class _FileInFile(object):
self.offset = offset
self.size = size
self.position = 0
+ self.name = getattr(fileobj, "name", None)
+ self.closed = False
if blockinfo is None:
blockinfo = [(0, size)]
@@ -686,10 +688,16 @@ class _FileInFile(object):
if lastpos < self.size:
self.map.append((False, lastpos, self.size, None))
+ def flush(self):
+ pass
+
+ def readable(self):
+ return True
+
+ def writable(self):
+ return False
+
def seekable(self):
- if not hasattr(self.fileobj, "seekable"):
- # XXX gzip.GzipFile and bz2.BZ2File
- return True
return self.fileobj.seekable()
def tell(self):
@@ -697,10 +705,21 @@ class _FileInFile(object):
"""
return self.position
- def seek(self, position):
+ def seek(self, position, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
- self.position = position
+ if whence == io.SEEK_SET:
+ self.position = min(max(position, 0), self.size)
+ elif whence == io.SEEK_CUR:
+ if position < 0:
+ self.position = max(self.position + position, 0)
+ else:
+ self.position = min(self.position + position, self.size)
+ elif whence == io.SEEK_END:
+ self.position = max(min(self.size + position, self.size), 0)
+ else:
+ raise ValueError("Invalid argument")
+ return self.position
def read(self, size=None):
"""Read data from the file.
@@ -729,146 +748,16 @@ class _FileInFile(object):
size -= length
self.position += length
return buf
-#class _FileInFile
-
-
-class ExFileObject(object):
- """File-like object for reading an archive member.
- Is returned by TarFile.extractfile().
- """
- blocksize = 1024
-
- def __init__(self, tarfile, tarinfo):
- self.fileobj = _FileInFile(tarfile.fileobj,
- tarinfo.offset_data,
- tarinfo.size,
- tarinfo.sparse)
- self.name = tarinfo.name
- self.mode = "r"
- self.closed = False
- self.size = tarinfo.size
-
- self.position = 0
- self.buffer = b""
-
- def readable(self):
- return True
-
- def writable(self):
- return False
-
- def seekable(self):
- return self.fileobj.seekable()
-
- def read(self, size=None):
- """Read at most size bytes from the file. If size is not
- present or None, read all data until EOF is reached.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- buf = b""
- if self.buffer:
- if size is None:
- buf = self.buffer
- self.buffer = b""
- else:
- buf = self.buffer[:size]
- self.buffer = self.buffer[size:]
-
- if size is None:
- buf += self.fileobj.read()
- else:
- buf += self.fileobj.read(size - len(buf))
-
- self.position += len(buf)
- return buf
-
- # XXX TextIOWrapper uses the read1() method.
- read1 = read
-
- def readline(self, size=-1):
- """Read one entire line from the file. If size is present
- and non-negative, return a string with at most that
- size, which may be an incomplete line.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- pos = self.buffer.find(b"\n") + 1
- if pos == 0:
- # no newline found.
- while True:
- buf = self.fileobj.read(self.blocksize)
- self.buffer += buf
- if not buf or b"\n" in buf:
- pos = self.buffer.find(b"\n") + 1
- if pos == 0:
- # no newline found.
- pos = len(self.buffer)
- break
-
- if size != -1:
- pos = min(size, pos)
-
- buf = self.buffer[:pos]
- self.buffer = self.buffer[pos:]
- self.position += len(buf)
- return buf
-
- def readlines(self):
- """Return a list with all remaining lines.
- """
- result = []
- while True:
- line = self.readline()
- if not line: break
- result.append(line)
- return result
-
- def tell(self):
- """Return the current file position.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- return self.position
- def seek(self, pos, whence=io.SEEK_SET):
- """Seek to a position in the file.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- if whence == io.SEEK_SET:
- self.position = min(max(pos, 0), self.size)
- elif whence == io.SEEK_CUR:
- if pos < 0:
- self.position = max(self.position + pos, 0)
- else:
- self.position = min(self.position + pos, self.size)
- elif whence == io.SEEK_END:
- self.position = max(min(self.size + pos, self.size), 0)
- else:
- raise ValueError("Invalid argument")
-
- self.buffer = b""
- self.fileobj.seek(self.position)
+ def readinto(self, b):
+ buf = self.read(len(b))
+ b[:len(buf)] = buf
+ return len(buf)
def close(self):
- """Close the file object.
- """
self.closed = True
+#class _FileInFile
- def __iter__(self):
- """Get an iterator over the file's lines.
- """
- while True:
- line = self.readline()
- if not line:
- break
- yield line
-#class ExFileObject
#------------------
# Exported Classes
@@ -1554,7 +1443,8 @@ class TarFile(object):
tarinfo = TarInfo # The default TarInfo class to use.
- fileobject = ExFileObject # The default ExFileObject class to use.
+ fileobject = None # The file-object for extractfile() or
+ # io.BufferedReader if None.
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
@@ -2178,12 +2068,9 @@ class TarFile(object):
def extractfile(self, member):
"""Extract a member from the archive as a file object. `member' may be
- a filename or a TarInfo object. If `member' is a regular file, a
- file-like object is returned. If `member' is a link, a file-like
- object is constructed from the link's target. If `member' is none of
- the above, None is returned.
- The file-like object is read-only and provides the following
- methods: read(), readline(), readlines(), seek() and tell()
+ a filename or a TarInfo object. If `member' is a regular file or a
+ link, an io.BufferedReader object is returned. Otherwise, None is
+ returned.
"""
self._check("r")
@@ -2192,13 +2079,14 @@ class TarFile(object):
else:
tarinfo = member
- if tarinfo.isreg():
- return self.fileobject(self, tarinfo)
-
- elif tarinfo.type not in SUPPORTED_TYPES:
- # If a member's type is unknown, it is treated as a
- # regular file.
- return self.fileobject(self, tarinfo)
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Members with unknown types are treated as regular files.
+ if self.fileobject is None:
+ fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse)
+ return io.BufferedReader(fileobj)
+ else:
+ # Keep the traditional pre-3.3 API intact.
+ return self.fileobject(self, tarinfo)
elif tarinfo.islnk() or tarinfo.issym():
if isinstance(self.fileobj, _Stream):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 4967ad0..c7bf774 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -56,13 +56,10 @@ class UstarReadTest(ReadTest):
def test_fileobj_regular_file(self):
tarinfo = self.tar.getmember("ustar/regtype")
- fobj = self.tar.extractfile(tarinfo)
- try:
+ with self.tar.extractfile(tarinfo) as fobj:
data = fobj.read()
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
"regular file extraction failed")
- finally:
- fobj.close()
def test_fileobj_readlines(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@@ -70,8 +67,7 @@ class UstarReadTest(ReadTest):
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
lines1 = fobj1.readlines()
- fobj = self.tar.extractfile(tarinfo)
- try:
+ with self.tar.extractfile(tarinfo) as fobj:
fobj2 = io.TextIOWrapper(fobj)
lines2 = fobj2.readlines()
self.assertTrue(lines1 == lines2,
@@ -81,21 +77,16 @@ class UstarReadTest(ReadTest):
self.assertTrue(lines2[83] ==
"I will gladly admit that Python is not the fastest running scripting language.\n",
"fileobj.readlines() failed")
- finally:
- fobj.close()
def test_fileobj_iter(self):
self.tar.extract("ustar/regtype", TEMPDIR)
tarinfo = self.tar.getmember("ustar/regtype")
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
lines1 = fobj1.readlines()
- fobj2 = self.tar.extractfile(tarinfo)
- try:
+ with self.tar.extractfile(tarinfo) as fobj2:
lines2 = list(io.TextIOWrapper(fobj2))
self.assertTrue(lines1 == lines2,
"fileobj.__iter__() failed")
- finally:
- fobj2.close()
def test_fileobj_seek(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@@ -147,17 +138,24 @@ class UstarReadTest(ReadTest):
"read() after readline() failed")
fobj.close()
+ def test_fileobj_text(self):
+ with self.tar.extractfile("ustar/regtype") as fobj:
+ fobj = io.TextIOWrapper(fobj)
+ data = fobj.read().encode("iso8859-1")
+ self.assertEqual(md5sum(data), md5_regtype)
+ try:
+ fobj.seek(100)
+ except AttributeError:
+ # Issue #13815: seek() complained about a missing
+ # flush() method.
+ self.fail("seeking failed in text mode")
+
# Test if symbolic and hard links are resolved by extractfile(). The
# test link members each point to a regular member whose data is
# supposed to be exported.
def _test_fileobj_link(self, lnktype, regtype):
- a = self.tar.extractfile(lnktype)
- b = self.tar.extractfile(regtype)
- try:
+ with self.tar.extractfile(lnktype) as a, self.tar.extractfile(regtype) as b:
self.assertEqual(a.name, b.name)
- finally:
- a.close()
- b.close()
def test_fileobj_link1(self):
self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
@@ -265,9 +263,8 @@ class MiscReadTest(CommonReadTest):
t = tar.next()
name = t.name
offset = t.offset
- f = tar.extractfile(t)
- data = f.read()
- f.close()
+ with tar.extractfile(t) as f:
+ data = f.read()
finally:
tar.close()
@@ -439,27 +436,26 @@ class StreamReadTest(CommonReadTest):
for tarinfo in self.tar:
if not tarinfo.isreg():
continue
- fobj = self.tar.extractfile(tarinfo)
- while True:
- try:
- buf = fobj.read(512)
- except tarfile.StreamError:
- self.fail("simple read-through using TarFile.extractfile() failed")
- if not buf:
- break
- fobj.close()
+ with self.tar.extractfile(tarinfo) as fobj:
+ while True:
+ try:
+ buf = fobj.read(512)
+ except tarfile.StreamError:
+ self.fail("simple read-through using TarFile.extractfile() failed")
+ if not buf:
+ break
def test_fileobj_regular_file(self):
tarinfo = self.tar.next() # get "regtype" (can't use getmember)
- fobj = self.tar.extractfile(tarinfo)
- data = fobj.read()
+ with self.tar.extractfile(tarinfo) as fobj:
+ data = fobj.read()
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
"regular file extraction failed")
def test_provoke_stream_error(self):
tarinfos = self.tar.getmembers()
- f = self.tar.extractfile(tarinfos[0]) # read the first member
- self.assertRaises(tarfile.StreamError, f.read)
+ with self.tar.extractfile(tarinfos[0]) as f: # read the first member
+ self.assertRaises(tarfile.StreamError, f.read)
def test_compare_members(self):
tar1 = tarfile.open(tarname, encoding="iso8859-1")
@@ -1484,12 +1480,9 @@ class AppendTest(unittest.TestCase):
with tarfile.open(tarname, encoding="iso8859-1") as src:
t = src.getmember("ustar/regtype")
t.name = "foo"
- f = src.extractfile(t)
- try:
+ with src.extractfile(t) as f:
with tarfile.open(self.tarname, mode) as tar:
tar.addfile(t, f)
- finally:
- f.close()
def _test(self, names=["bar"], fileobj=None):
with tarfile.open(self.tarname, fileobj=fileobj) as tar:
diff --git a/Misc/NEWS b/Misc/NEWS
index 6251da6..92139d9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -89,6 +89,8 @@ Core and Builtins
Library
-------
+- Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.
+
- Issue #14371: Support bzip2 in zipfile module.
Patch by Serhiy Storchaka.