diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2012-05-05 16:15:03 (GMT) |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2012-05-05 16:15:03 (GMT) |
commit | 7a919e993052deec2826d6ba823156c805b1a9d9 (patch) | |
tree | 27fadbf121cdccef114d61d62f211a890d1455fe /Lib/tarfile.py | |
parent | ef5a4636d078d87d30885a9c371671a217754c8d (diff) | |
download | cpython-7a919e993052deec2826d6ba823156c805b1a9d9.zip cpython-7a919e993052deec2826d6ba823156c805b1a9d9.tar.gz cpython-7a919e993052deec2826d6ba823156c805b1a9d9.tar.bz2 |
Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.
The ExFileObject class was removed, some of its code went into _FileInFile.
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r-- | Lib/tarfile.py | 196 |
1 files changed, 42 insertions, 154 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index efb2773..e273787 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -668,6 +668,8 @@ class _FileInFile(object): self.offset = offset self.size = size self.position = 0 + self.name = getattr(fileobj, "name", None) + self.closed = False if blockinfo is None: blockinfo = [(0, size)] @@ -686,10 +688,16 @@ class _FileInFile(object): if lastpos < self.size: self.map.append((False, lastpos, self.size, None)) + def flush(self): + pass + + def readable(self): + return True + + def writable(self): + return False + def seekable(self): - if not hasattr(self.fileobj, "seekable"): - # XXX gzip.GzipFile and bz2.BZ2File - return True return self.fileobj.seekable() def tell(self): @@ -697,10 +705,21 @@ class _FileInFile(object): """ return self.position - def seek(self, position): + def seek(self, position, whence=io.SEEK_SET): """Seek to a position in the file. """ - self.position = position + if whence == io.SEEK_SET: + self.position = min(max(position, 0), self.size) + elif whence == io.SEEK_CUR: + if position < 0: + self.position = max(self.position + position, 0) + else: + self.position = min(self.position + position, self.size) + elif whence == io.SEEK_END: + self.position = max(min(self.size + position, self.size), 0) + else: + raise ValueError("Invalid argument") + return self.position def read(self, size=None): """Read data from the file. @@ -729,146 +748,16 @@ class _FileInFile(object): size -= length self.position += length return buf -#class _FileInFile - - -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). - """ - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile(tarfile.fileobj, - tarinfo.offset_data, - tarinfo.size, - tarinfo.sparse) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = b"" - - def readable(self): - return True - - def writable(self): - return False - - def seekable(self): - return self.fileobj.seekable() - - def read(self, size=None): - """Read at most size bytes from the file. If size is not - present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = b"" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = b"" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - # XXX TextIOWrapper uses the read1() method. - read1 = read - - def readline(self, size=-1): - """Read one entire line from the file. If size is present - and non-negative, return a string with at most that - size, which may be an incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - while True: - buf = self.fileobj.read(self.blocksize) - self.buffer += buf - if not buf or b"\n" in buf: - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def tell(self): - """Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - def seek(self, pos, whence=io.SEEK_SET): - """Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == io.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == io.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == io.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = b"" - self.fileobj.seek(self.position) + def readinto(self, b): + buf = self.read(len(b)) + b[:len(buf)] = buf + return len(buf) def close(self): - """Close the file object. - """ self.closed = True +#class _FileInFile - def __iter__(self): - """Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line -#class ExFileObject #------------------ # Exported Classes @@ -1554,7 +1443,8 @@ class TarFile(object): tarinfo = TarInfo # The default TarInfo class to use. - fileobject = ExFileObject # The default ExFileObject class to use. + fileobject = None # The file-object for extractfile() or + # io.BufferedReader if None. def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, @@ -2178,12 +2068,9 @@ class TarFile(object): def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() + a filename or a TarInfo object. If `member' is a regular file or a + link, an io.BufferedReader object is returned. Otherwise, None is + returned. """ self._check("r") @@ -2192,13 +2079,14 @@ class TarFile(object): else: tarinfo = member - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) + if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: + # Members with unknown types are treated as regular files. + if self.fileobject is None: + fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse) + return io.BufferedReader(fileobj) + else: + # Keep the traditional pre-3.3 API intact. + return self.fileobject(self, tarinfo) elif tarinfo.islnk() or tarinfo.issym(): if isinstance(self.fileobj, _Stream): |