diff options
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r-- | Lib/tarfile.py | 163 |
1 files changed, 74 insertions, 89 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index af32b0f..e3380a2 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -29,8 +29,6 @@ """Read from and write to tar format archives. """ -__version__ = "$Revision$" - version = "0.9.0" __author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" __date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" @@ -42,9 +40,9 @@ __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." #--------- import sys import os +import io import shutil import stat -import errno import time import struct import copy @@ -423,10 +421,11 @@ class _Stream: self.crc = zlib.crc32(b"") if mode == "r": self._init_read_gz() + self.exception = zlib.error else: self._init_write_gz() - if comptype == "bz2": + elif comptype == "bz2": try: import bz2 except ImportError: @@ -434,8 +433,25 @@ class _Stream: if mode == "r": self.dbuf = b"" self.cmp = bz2.BZ2Decompressor() + self.exception = IOError else: self.cmp = bz2.BZ2Compressor() + + elif comptype == "xz": + try: + import lzma + except ImportError: + raise CompressionError("lzma module is not available") + if mode == "r": + self.dbuf = b"" + self.cmp = lzma.LZMADecompressor() + self.exception = lzma.LZMAError + else: + self.cmp = lzma.LZMACompressor() + + elif comptype != "tar": + raise CompressionError("unknown compression type %r" % comptype) + except: if not self._extfileobj: self.fileobj.close() @@ -587,7 +603,7 @@ class _Stream: break try: buf = self.cmp.decompress(buf) - except IOError: + except self.exception: raise ReadError("invalid compressed data") self.dbuf += buf c += len(buf) @@ -625,76 +641,19 @@ class _StreamProxy(object): return self.buf def getcomptype(self): - if self.buf.startswith(b"\037\213\010"): + if self.buf.startswith(b"\x1f\x8b\x08"): return "gz" - if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": + elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": return "bz2" - return "tar" + elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): + return "xz" + else: + return "tar" def close(self): self.fileobj.close() # class StreamProxy -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = b"" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - self.buf += data - x += len(data) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) -# class _BZ2Proxy - #------------------------ # Extraction file object #------------------------ @@ -875,20 +834,20 @@ class ExFileObject(object): return self.position - def seek(self, pos, whence=os.SEEK_SET): + def seek(self, pos, whence=io.SEEK_SET): """Seek to a position in the file. """ if self.closed: raise ValueError("I/O operation on closed file") - if whence == os.SEEK_SET: + if whence == io.SEEK_SET: self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: + elif whence == io.SEEK_CUR: if pos < 0: self.position = max(self.position + pos, 0) else: self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: + elif whence == io.SEEK_END: self.position = max(min(self.size + pos, self.size), 0) else: raise ValueError("Invalid argument") @@ -1087,7 +1046,7 @@ class TarInfo(object): def create_pax_global_header(cls, pax_headers): """Return the object as a pax global header block sequence. """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") def _posix_split_name(self, name): """Split a name longer than 100 chars into a prefix @@ -1170,7 +1129,7 @@ class TarInfo(object): binary = False for keyword, value in pax_headers.items(): try: - value.encode("utf8", "strict") + value.encode("utf-8", "strict") except UnicodeEncodeError: binary = True break @@ -1181,13 +1140,13 @@ class TarInfo(object): records += b"21 hdrcharset=BINARY\n" for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") + keyword = keyword.encode("utf-8") if binary: # Try to restore the original byte representation of `value'. # Needless to say, that the encoding must match the string. value = value.encode(encoding, "surrogateescape") else: - value = value.encode("utf8") + value = value.encode("utf-8") l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' n = p = 0 @@ -1396,7 +1355,7 @@ class TarInfo(object): # the translation to UTF-8 fails. match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf8") + pax_headers["hdrcharset"] = match.group(1).decode("utf-8") # For the time being, we don't care about anything other than "BINARY". # The only other value that is currently allowed by the standard is @@ -1405,7 +1364,7 @@ class TarInfo(object): if hdrcharset == "BINARY": encoding = tarfile.encoding else: - encoding = "utf8" + encoding = "utf-8" # Parse pax header information. A record looks like that: # "%d %s=%s\n" % (length, keyword, value). length is the size @@ -1422,20 +1381,20 @@ class TarInfo(object): length = int(length) value = buf[match.end(2) + 1:match.start(1) + length - 1] - # Normally, we could just use "utf8" as the encoding and "strict" + # Normally, we could just use "utf-8" as the encoding and "strict" # as the error handler, but we better not take the risk. For # example, GNU tar <= 1.23 is known to store filenames it cannot # translate to UTF-8 as raw strings (unfortunately without a # hdrcharset=BINARY header). # We first try the strict standard encoding, and if that fails we # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf8", "utf8", + keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", tarfile.errors) if keyword in PAX_NAME_FIELDS: value = self._decode_pax_field(value, encoding, tarfile.encoding, tarfile.errors) else: - value = self._decode_pax_field(value, "utf8", "utf8", + value = self._decode_pax_field(value, "utf-8", "utf-8", tarfile.errors) pax_headers[keyword] = value @@ -1714,18 +1673,22 @@ class TarFile(object): 'r:' open for reading exclusively uncompressed 'r:gz' open for reading with gzip compression 'r:bz2' open for reading with bzip2 compression + 'r:xz' open for reading with lzma compression 'a' or 'a:' open for appending, creating the file if necessary 'w' or 'w:' open for writing without compression 'w:gz' open for writing with gzip compression 'w:bz2' open for writing with bzip2 compression + 'w:xz' open for writing with lzma compression 'r|*' open a stream of tar blocks with transparent compression 'r|' open an uncompressed stream of tar blocks for reading 'r|gz' open a gzip compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks + 'r|xz' open an lzma compressed stream of tar blocks 'w|' open an uncompressed stream for writing 'w|gz' open a gzip compressed stream for writing 'w|bz2' open a bzip2 compressed stream for writing + 'w|xz' open an lzma compressed stream for writing """ if not name and not fileobj: @@ -1832,10 +1795,8 @@ class TarFile(object): except ImportError: raise CompressionError("bz2 module is not available") - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) + fileobj = bz2.BZ2File(filename=name if fileobj is None else None, + mode=mode, fileobj=fileobj, compresslevel=compresslevel) try: t = cls.taropen(name, mode, fileobj, **kwargs) @@ -1845,11 +1806,36 @@ class TarFile(object): t._extfileobj = False return t + @classmethod + def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): + """Open lzma compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if mode not in ("r", "w"): + raise ValueError("mode must be 'r' or 'w'") + + try: + import lzma + except ImportError: + raise CompressionError("lzma module is not available") + + fileobj = lzma.LZMAFile(filename=name if fileobj is None else None, + mode=mode, fileobj=fileobj, preset=preset) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except (lzma.LZMAError, EOFError): + fileobj.close() + raise ReadError("not an lzma file") + t._extfileobj = False + return t + # All *open() methods are registered here. OPEN_METH = { "tar": "taropen", # uncompressed tar "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar + "bz2": "bz2open", # bzip2 compressed tar + "xz": "xzopen" # lzma compressed tar } #-------------------------------------------------------------------------- @@ -2283,9 +2269,8 @@ class TarFile(object): # Use a safe mode for the directory, the real mode is set # later in _extract_member(). os.mkdir(targetpath, 0o700) - except EnvironmentError as e: - if e.errno != errno.EEXIST: - raise + except FileExistsError: + pass def makefile(self, tarinfo, targetpath): """Make a file called targetpath. |