diff options
Diffstat (limited to 'Lib/tarfile.py')
-rwxr-xr-x | Lib/tarfile.py | 108 |
1 files changed, 49 insertions, 59 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 999a99b..b78b1b1 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -64,7 +64,10 @@ except NameError: pass # from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] +__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", + "CompressionError", "StreamError", "ExtractError", "HeaderError", + "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", + "DEFAULT_FORMAT", "open"] #--------------------------------------------------------- # tar constants @@ -141,7 +144,7 @@ PAX_NUMBER_FIELDS = { #--------------------------------------------------------- # initialization #--------------------------------------------------------- -if os.name in ("nt", "ce"): +if os.name == "nt": ENCODING = "utf-8" else: ENCODING = sys.getfilesystemencoding() @@ -225,21 +228,21 @@ def calc_chksums(buf): signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) return unsigned_chksum, signed_chksum -def copyfileobj(src, dst, length=None, exception=OSError): +def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): """Copy length bytes from fileobj src to fileobj dst. If length is None, copy the entire content. """ + bufsize = bufsize or 16 * 1024 if length == 0: return if length is None: - shutil.copyfileobj(src, dst) + shutil.copyfileobj(src, dst, bufsize) return - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) + blocks, remainder = divmod(length, bufsize) for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: + buf = src.read(bufsize) + if len(buf) < bufsize: raise exception("unexpected end of data") dst.write(buf) @@ -1400,7 +1403,8 @@ class TarFile(object): def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): + errors="surrogateescape", pax_headers=None, debug=None, + errorlevel=None, copybufsize=None): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' @@ -1456,6 +1460,7 @@ class TarFile(object): self.errorlevel = errorlevel # Init datastructures. + self.copybufsize = copybufsize self.closed = False self.members = [] # list of members as TarInfo objects self._loaded = False # flag if all members have been read @@ -1557,7 +1562,7 @@ class TarFile(object): saved_pos = fileobj.tell() try: return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: + except (ReadError, CompressionError): if fileobj is not None: fileobj.seek(saved_pos) continue @@ -1962,10 +1967,10 @@ class TarFile(object): buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) self.offset += len(buf) - + bufsize=self.copybufsize # If there's data to follow, append it. if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) + copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) if remainder > 0: self.fileobj.write(NUL * (BLOCKSIZE - remainder)) @@ -2147,15 +2152,16 @@ class TarFile(object): """ source = self.fileobj source.seek(tarinfo.offset_data) + bufsize = self.copybufsize with bltn_open(targetpath, "wb") as target: if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) - copyfileobj(source, target, size, ReadError) + copyfileobj(source, target, size, ReadError, bufsize) target.seek(tarinfo.size) target.truncate() else: - copyfileobj(source, target, tarinfo.size, ReadError) + copyfileobj(source, target, tarinfo.size, ReadError, bufsize) def makeunknown(self, tarinfo, targetpath): """Make a file from a TarInfo object with an unknown type @@ -2234,7 +2240,7 @@ class TarFile(object): os.lchown(targetpath, u, g) else: os.chown(targetpath, u, g) - except OSError as e: + except OSError: raise ExtractError("could not change owner") def chmod(self, tarinfo, targetpath): @@ -2243,7 +2249,7 @@ class TarFile(object): if hasattr(os, 'chmod'): try: os.chmod(targetpath, tarinfo.mode) - except OSError as e: + except OSError: raise ExtractError("could not change mode") def utime(self, tarinfo, targetpath): @@ -2253,7 +2259,7 @@ class TarFile(object): return try: os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except OSError as e: + except OSError: raise ExtractError("could not change modification time") #-------------------------------------------------------------------------- @@ -2376,9 +2382,32 @@ class TarFile(object): """Provide an iterator object. """ if self._loaded: - return iter(self.members) - else: - return TarIter(self) + yield from self.members + return + + # Yield items using TarFile's next() method. + # When all members have been read, set TarFile as _loaded. + index = 0 + # Fix for SF #1100429: Under rare circumstances it can + # happen that getmembers() is called during iteration, + # which will have already exhausted the next() method. + if self.firstmember is not None: + tarinfo = self.next() + index += 1 + yield tarinfo + + while True: + if index < len(self.members): + tarinfo = self.members[index] + elif not self._loaded: + tarinfo = self.next() + if not tarinfo: + self._loaded = True + return + else: + return + index += 1 + yield tarinfo def _dbg(self, level, msg): """Write debugging output to sys.stderr. @@ -2399,45 +2428,6 @@ class TarFile(object): if not self._extfileobj: self.fileobj.close() self.closed = True -# class TarFile - -class TarIter: - """Iterator Class. - - for tarinfo in TarFile(...): - suite... - """ - - def __init__(self, tarfile): - """Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - def __iter__(self): - """Return iterator object. - """ - return self - def __next__(self): - """Return the next item using TarFile's next() method. - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - - if self.index == 0 and self.tarfile.firstmember is not None: - tarinfo = self.tarfile.next() - elif self.index < len(self.tarfile.members): - tarinfo = self.tarfile.members[self.index] - elif not self.tarfile._loaded: - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - raise StopIteration - self.index += 1 - return tarinfo #-------------------- # exported functions |