summaryrefslogtreecommitdiffstats
path: root/Lib/tarfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r--Lib/tarfile.py539
1 files changed, 314 insertions, 225 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 0b3d477..061d0f5 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -33,7 +33,7 @@
__version__ = "$Revision$"
# $Source$
-version = "0.6.4"
+version = "0.8.0"
__author__ = "Lars Gustäbel (lars@gustaebel.de)"
__date__ = "$Date$"
__cvsid__ = "$Id$"
@@ -132,21 +132,65 @@ TOEXEC = 0001 # execute/search by other
#---------------------------------------------------------
# Some useful functions
#---------------------------------------------------------
-def nts(s):
- """Convert a null-terminated string buffer to a python string.
+
+def stn(s, length):
+ """Convert a python string to a null-terminated string buffer.
"""
- return s.rstrip(NUL)
+ return s[:length-1] + (length - len(s) - 1) * NUL + NUL
-def calc_chksum(buf):
- """Calculate the checksum for a member's header. It's a simple addition
- of all bytes, treating the chksum field as if filled with spaces.
- buf is a 512 byte long string buffer which holds the header.
+def nti(s):
+ """Convert a number field to a python number.
+ """
+ # There are two possible encodings for a number field, see
+ # itn() below.
+ if s[0] != chr(0200):
+ n = int(s.rstrip(NUL) or "0", 8)
+ else:
+ n = 0L
+ for i in xrange(len(s) - 1):
+ n <<= 8
+ n += ord(s[i + 1])
+ return n
+
+def itn(n, digits=8, posix=False):
+ """Convert a python number to a number field.
+ """
+ # POSIX 1003.1-1988 requires numbers to be encoded as a string of
+ # octal digits followed by a null-byte, this allows values up to
+ # (8**(digits-1))-1. GNU tar allows storing numbers greater than
+ # that if necessary. A leading 0200 byte indicates this particular
+ # encoding, the following digits-1 bytes are a big-endian
+ # representation. This allows values up to (256**(digits-1))-1.
+ if 0 <= n < 8 ** (digits - 1):
+ s = "%0*o" % (digits - 1, n) + NUL
+ else:
+ if posix:
+ raise ValueError("overflow in number field")
+
+ if n < 0:
+ # XXX We mimic GNU tar's behaviour with negative numbers,
+ # this could raise OverflowError.
+ n = struct.unpack("L", struct.pack("l", n))[0]
+
+ s = ""
+ for i in xrange(digits - 1):
+ s = chr(n & 0377) + s
+ n >>= 8
+ s = chr(0200) + s
+ return s
+
+def calc_chksums(buf):
+ """Calculate the checksum for a member's header by summing up all
+ characters except for the chksum field which is treated as if
+ it was filled with spaces. According to the GNU tar sources,
+ some tars (Sun and NeXT) calculate chksum with signed char,
+ which will be different if there are chars in the buffer with
+ the high bit set. So we calculate two checksums, unsigned and
+ signed.
"""
- chk = 256 # chksum field is treated as blanks,
- # so the initial value is 8 * ord(" ")
- for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
- for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
- return chk
+ unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
+ signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+ return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None):
"""Copy length bytes from fileobj src to fileobj dst.
@@ -163,13 +207,13 @@ def copyfileobj(src, dst, length=None):
for b in xrange(blocks):
buf = src.read(BUFSIZE)
if len(buf) < BUFSIZE:
- raise IOError, "end of file reached"
+ raise IOError("end of file reached")
dst.write(buf)
if remainder != 0:
buf = src.read(remainder)
if len(buf) < remainder:
- raise IOError, "end of file reached"
+ raise IOError("end of file reached")
dst.write(buf)
return
@@ -301,7 +345,7 @@ class _Stream:
try:
import zlib
except ImportError:
- raise CompressionError, "zlib module is not available"
+ raise CompressionError("zlib module is not available")
self.zlib = zlib
self.crc = zlib.crc32("")
if mode == "r":
@@ -313,7 +357,7 @@ class _Stream:
try:
import bz2
except ImportError:
- raise CompressionError, "bz2 module is not available"
+ raise CompressionError("bz2 module is not available")
if mode == "r":
self.dbuf = ""
self.cmp = bz2.BZ2Decompressor()
@@ -389,9 +433,9 @@ class _Stream:
# taken from gzip.GzipFile with some alterations
if self.__read(2) != "\037\213":
- raise ReadError, "not a gzip file"
+ raise ReadError("not a gzip file")
if self.__read(1) != "\010":
- raise CompressionError, "unsupported compression method"
+ raise CompressionError("unsupported compression method")
flag = ord(self.__read(1))
self.__read(6)
@@ -427,7 +471,7 @@ class _Stream:
self.read(self.bufsize)
self.read(remainder)
else:
- raise StreamError, "seeking backwards is not allowed"
+ raise StreamError("seeking backwards is not allowed")
return self.pos
def read(self, size=None):
@@ -508,6 +552,69 @@ class _StreamProxy(object):
self.fileobj.close()
# class StreamProxy
+class _BZ2Proxy(object):
+ """Small proxy class that enables external file object
+ support for "r:bz2" and "w:bz2" modes. This is actually
+ a workaround for a limitation in bz2 module's BZ2File
+ class which (unlike gzip.GzipFile) has no support for
+ a file object argument.
+ """
+
+ blocksize = 16 * 1024
+
+ def __init__(self, fileobj, mode):
+ self.fileobj = fileobj
+ self.mode = mode
+ self.init()
+
+ def init(self):
+ import bz2
+ self.pos = 0
+ if self.mode == "r":
+ self.bz2obj = bz2.BZ2Decompressor()
+ self.fileobj.seek(0)
+ self.buf = ""
+ else:
+ self.bz2obj = bz2.BZ2Compressor()
+
+ def read(self, size):
+ b = [self.buf]
+ x = len(self.buf)
+ while x < size:
+ try:
+ raw = self.fileobj.read(self.blocksize)
+ data = self.bz2obj.decompress(raw)
+ b.append(data)
+ except EOFError:
+ break
+ x += len(data)
+ self.buf = "".join(b)
+
+ buf = self.buf[:size]
+ self.buf = self.buf[size:]
+ self.pos += len(buf)
+ return buf
+
+ def seek(self, pos):
+ if pos < self.pos:
+ self.init()
+ self.read(pos - self.pos)
+
+ def tell(self):
+ return self.pos
+
+ def write(self, data):
+ self.pos += len(data)
+ raw = self.bz2obj.compress(data)
+ self.fileobj.write(raw)
+
+ def close(self):
+ if self.mode == "w":
+ raw = self.bz2obj.flush()
+ self.fileobj.write(raw)
+ self.fileobj.close()
+# class _BZ2Proxy
+
#------------------------
# Extraction file object
#------------------------
@@ -581,7 +688,7 @@ class ExFileObject(object):
"""Read operation for regular files.
"""
if self.closed:
- raise ValueError, "file is closed"
+ raise ValueError("file is closed")
self.fileobj.seek(self.offset + self.pos)
bytesleft = self.size - self.pos
if size is None:
@@ -595,7 +702,7 @@ class ExFileObject(object):
"""Read operation for sparse files.
"""
if self.closed:
- raise ValueError, "file is closed"
+ raise ValueError("file is closed")
if size is None:
size = self.size - self.pos
@@ -684,24 +791,24 @@ class TarInfo(object):
of the member.
"""
- self.name = name # member name (dirnames must end with '/')
- self.mode = 0666 # file permissions
- self.uid = 0 # user id
- self.gid = 0 # group id
- self.size = 0 # file size
- self.mtime = 0 # modification time
- self.chksum = 0 # header checksum
- self.type = REGTYPE # member type
- self.linkname = "" # link name
- self.uname = "user" # user name
- self.gname = "group" # group name
- self.devmajor = 0 #-
- self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
- self.prefix = "" # prefix to filename or holding information
- # about sparse files
-
- self.offset = 0 # the tar header starts here
- self.offset_data = 0 # the file's data starts here
+ self.name = name # member name (dirnames must end with '/')
+ self.mode = 0666 # file permissions
+ self.uid = 0 # user id
+ self.gid = 0 # group id
+ self.size = 0 # file size
+ self.mtime = 0 # modification time
+ self.chksum = 0 # header checksum
+ self.type = REGTYPE # member type
+ self.linkname = "" # link name
+ self.uname = "user" # user name
+ self.gname = "group" # group name
+ self.devmajor = 0 # device major number
+ self.devminor = 0 # device minor number
+ self.prefix = "" # prefix to filename or information
+ # about sparse files
+
+ self.offset = 0 # the tar header starts here
+ self.offset_data = 0 # the file's data starts here
def __repr__(self):
return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
@@ -710,95 +817,57 @@ class TarInfo(object):
def frombuf(cls, buf):
"""Construct a TarInfo object from a 512 byte string buffer.
"""
+ if len(buf) != BLOCKSIZE:
+ raise ValueError("truncated header")
+ if buf.count(NUL) == BLOCKSIZE:
+ raise ValueError("empty header")
+
tarinfo = cls()
- tarinfo.name = nts(buf[0:100])
- tarinfo.mode = int(buf[100:108], 8)
- tarinfo.uid = int(buf[108:116],8)
- tarinfo.gid = int(buf[116:124],8)
-
- # There are two possible codings for the size field we
- # have to discriminate, see comment in tobuf() below.
- if buf[124] != chr(0200):
- tarinfo.size = long(buf[124:136], 8)
- else:
- tarinfo.size = 0L
- for i in range(11):
- tarinfo.size <<= 8
- tarinfo.size += ord(buf[125 + i])
-
- tarinfo.mtime = long(buf[136:148], 8)
- tarinfo.chksum = int(buf[148:156], 8)
- tarinfo.type = buf[156:157]
- tarinfo.linkname = nts(buf[157:257])
- tarinfo.uname = nts(buf[265:297])
- tarinfo.gname = nts(buf[297:329])
- try:
- tarinfo.devmajor = int(buf[329:337], 8)
- tarinfo.devminor = int(buf[337:345], 8)
- except ValueError:
- tarinfo.devmajor = tarinfo.devmajor = 0
+ tarinfo.buf = buf
+ tarinfo.name = buf[0:100].rstrip(NUL)
+ tarinfo.mode = nti(buf[100:108])
+ tarinfo.uid = nti(buf[108:116])
+ tarinfo.gid = nti(buf[116:124])
+ tarinfo.size = nti(buf[124:136])
+ tarinfo.mtime = nti(buf[136:148])
+ tarinfo.chksum = nti(buf[148:156])
+ tarinfo.type = buf[156:157]
+ tarinfo.linkname = buf[157:257].rstrip(NUL)
+ tarinfo.uname = buf[265:297].rstrip(NUL)
+ tarinfo.gname = buf[297:329].rstrip(NUL)
+ tarinfo.devmajor = nti(buf[329:337])
+ tarinfo.devminor = nti(buf[337:345])
tarinfo.prefix = buf[345:500]
- # Some old tar programs represent a directory as a regular
- # file with a trailing slash.
- if tarinfo.isreg() and tarinfo.name.endswith("/"):
- tarinfo.type = DIRTYPE
-
- # The prefix field is used for filenames > 100 in
- # the POSIX standard.
- # name = prefix + '/' + name
- if tarinfo.type != GNUTYPE_SPARSE:
- tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
-
- # Directory names should have a '/' at the end.
- if tarinfo.isdir():
- tarinfo.name += "/"
+ if tarinfo.chksum not in calc_chksums(buf):
+ raise ValueError("invalid header")
return tarinfo
- def tobuf(self):
+ def tobuf(self, posix=False):
"""Return a tar header block as a 512 byte string.
"""
- # Prefer the size to be encoded as 11 octal ascii digits
- # which is the most portable. If the size exceeds this
- # limit (>= 8 GB), encode it as an 88-bit value which is
- # a GNU tar feature.
- if self.size <= MAXSIZE_MEMBER:
- size = "%011o" % self.size
- else:
- s = self.size
- size = ""
- for i in range(11):
- size = chr(s & 0377) + size
- s >>= 8
- size = chr(0200) + size
-
- # The following code was contributed by Detlef Lannert.
- parts = []
- for value, fieldsize in (
- (self.name, 100),
- ("%07o" % (self.mode & 07777), 8),
- ("%07o" % self.uid, 8),
- ("%07o" % self.gid, 8),
- (size, 12),
- ("%011o" % self.mtime, 12),
- (" ", 8),
- (self.type, 1),
- (self.linkname, 100),
- (MAGIC, 6),
- (VERSION, 2),
- (self.uname, 32),
- (self.gname, 32),
- ("%07o" % self.devmajor, 8),
- ("%07o" % self.devminor, 8),
- (self.prefix, 155)
- ):
- l = len(value)
- parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
-
- buf = "".join(parts)
- chksum = calc_chksum(buf)
+ parts = [
+ stn(self.name, 100),
+ itn(self.mode & 07777, 8, posix),
+ itn(self.uid, 8, posix),
+ itn(self.gid, 8, posix),
+ itn(self.size, 12, posix),
+ itn(self.mtime, 12, posix),
+ " ", # checksum field
+ self.type,
+ stn(self.linkname, 100),
+ stn(MAGIC, 6),
+ stn(VERSION, 2),
+ stn(self.uname, 32),
+ stn(self.gname, 32),
+ itn(self.devmajor, 8, posix),
+ itn(self.devminor, 8, posix),
+ stn(self.prefix, 155)
+ ]
+
+ buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+ chksum = calc_chksums(buf)[0]
buf = buf[:148] + "%06o\0" % chksum + buf[155:]
- buf += (BLOCKSIZE - len(buf)) * NUL
self.buf = buf
return buf
@@ -857,7 +926,7 @@ class TarFile(object):
self.name = name
if len(mode) > 1 or mode not in "raw":
- raise ValueError, "mode must be 'r', 'a' or 'w'"
+ raise ValueError("mode must be 'r', 'a' or 'w'")
self._mode = mode
self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
@@ -873,12 +942,12 @@ class TarFile(object):
self.fileobj = fileobj
# Init datastructures
- self.closed = False
- self.members = [] # list of members as TarInfo objects
- self._loaded = False # flag if all members have been read
- self.offset = 0L # current position in the archive file
- self.inodes = {} # dictionary caching the inodes of
- # archive members already added
+ self.closed = False
+ self.members = [] # list of members as TarInfo objects
+ self._loaded = False # flag if all members have been read
+ self.offset = 0L # current position in the archive file
+ self.inodes = {} # dictionary caching the inodes of
+ # archive members already added
if self._mode == "r":
self.firstmember = None
@@ -937,7 +1006,7 @@ class TarFile(object):
"""
if not name and not fileobj:
- raise ValueError, "nothing to open"
+ raise ValueError("nothing to open")
if mode in ("r", "r:*"):
# Find out which *open() is appropriate for opening the file.
@@ -947,7 +1016,7 @@ class TarFile(object):
return func(name, "r", fileobj)
except (ReadError, CompressionError):
continue
- raise ReadError, "file could not be opened successfully"
+ raise ReadError("file could not be opened successfully")
elif ":" in mode:
filemode, comptype = mode.split(":", 1)
@@ -959,7 +1028,7 @@ class TarFile(object):
if comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
else:
- raise CompressionError, "unknown compression type %r" % comptype
+ raise CompressionError("unknown compression type %r" % comptype)
return func(name, filemode, fileobj)
elif "|" in mode:
@@ -968,7 +1037,7 @@ class TarFile(object):
comptype = comptype or "tar"
if filemode not in "rw":
- raise ValueError, "mode must be 'r' or 'w'"
+ raise ValueError("mode must be 'r' or 'w'")
t = cls(name, filemode,
_Stream(name, filemode, comptype, fileobj, bufsize))
@@ -978,14 +1047,14 @@ class TarFile(object):
elif mode in "aw":
return cls.taropen(name, mode, fileobj)
- raise ValueError, "undiscernible mode"
+ raise ValueError("undiscernible mode")
@classmethod
def taropen(cls, name, mode="r", fileobj=None):
"""Open uncompressed tar archive name for reading or writing.
"""
if len(mode) > 1 or mode not in "raw":
- raise ValueError, "mode must be 'r', 'a' or 'w'"
+ raise ValueError("mode must be 'r', 'a' or 'w'")
return cls(name, mode, fileobj)
@classmethod
@@ -994,13 +1063,13 @@ class TarFile(object):
Appending is not allowed.
"""
if len(mode) > 1 or mode not in "rw":
- raise ValueError, "mode must be 'r' or 'w'"
+ raise ValueError("mode must be 'r' or 'w'")
try:
import gzip
gzip.GzipFile
except (ImportError, AttributeError):
- raise CompressionError, "gzip module is not available"
+ raise CompressionError("gzip module is not available")
pre, ext = os.path.splitext(name)
pre = os.path.basename(pre)
@@ -1021,7 +1090,7 @@ class TarFile(object):
gzip.GzipFile(name, mode, compresslevel, fileobj)
)
except IOError:
- raise ReadError, "not a gzip file"
+ raise ReadError("not a gzip file")
t._extfileobj = False
return t
@@ -1031,12 +1100,12 @@ class TarFile(object):
Appending is not allowed.
"""
if len(mode) > 1 or mode not in "rw":
- raise ValueError, "mode must be 'r' or 'w'."
+ raise ValueError("mode must be 'r' or 'w'.")
try:
import bz2
except ImportError:
- raise CompressionError, "bz2 module is not available"
+ raise CompressionError("bz2 module is not available")
pre, ext = os.path.splitext(name)
pre = os.path.basename(pre)
@@ -1047,12 +1116,14 @@ class TarFile(object):
tarname = pre + ext
if fileobj is not None:
- raise ValueError, "no support for external file objects"
+ fileobj = _BZ2Proxy(fileobj, mode)
+ else:
+ fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
try:
- t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
+ t = cls.taropen(tarname, mode, fileobj)
except IOError:
- raise ReadError, "not a bzip2 file"
+ raise ReadError("not a bzip2 file")
t._extfileobj = False
return t
@@ -1094,7 +1165,7 @@ class TarFile(object):
"""
tarinfo = self._getmember(name)
if tarinfo is None:
- raise KeyError, "filename %r not found" % name
+ raise KeyError("filename %r not found" % name)
return tarinfo
def getmembers(self):
@@ -1313,15 +1384,14 @@ class TarFile(object):
if tarinfo.size > MAXSIZE_MEMBER:
if self.posix:
- raise ValueError, "file is too large (>= 8 GB)"
+ raise ValueError("file is too large (>= 8 GB)")
else:
self._dbg(2, "tarfile: Created GNU tar largefile header")
if len(tarinfo.linkname) > LENGTH_LINK:
if self.posix:
- raise ValueError, "linkname is too long (>%d)" \
- % (LENGTH_LINK)
+ raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
else:
self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
@@ -1337,8 +1407,7 @@ class TarFile(object):
prefix = prefix[:-1]
if not prefix or len(name) > LENGTH_NAME:
- raise ValueError, "name is too long (>%d)" \
- % (LENGTH_NAME)
+ raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
tarinfo.name = name
tarinfo.prefix = prefix
@@ -1347,7 +1416,7 @@ class TarFile(object):
tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
- self.fileobj.write(tarinfo.tobuf())
+ self.fileobj.write(tarinfo.tobuf(self.posix))
self.offset += BLOCKSIZE
# If there's data to follow, append it.
@@ -1464,7 +1533,7 @@ class TarFile(object):
# A small but ugly workaround for the case that someone tries
# to extract a (sym)link as a file-object from a non-seekable
# stream of tar blocks.
- raise StreamError, "cannot extract (sym)link as file object"
+ raise StreamError("cannot extract (sym)link as file object")
else:
# A (sym)link's file object is its target's file object.
return self.extractfile(self._getmember(tarinfo.linkname,
@@ -1564,13 +1633,13 @@ class TarFile(object):
if hasattr(os, "mkfifo"):
os.mkfifo(targetpath)
else:
- raise ExtractError, "fifo not supported by system"
+ raise ExtractError("fifo not supported by system")
def makedev(self, tarinfo, targetpath):
"""Make a character or block device called targetpath.
"""
if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
- raise ExtractError, "special devices not supported by system"
+ raise ExtractError("special devices not supported by system")
mode = tarinfo.mode
if tarinfo.isblk():
@@ -1606,7 +1675,7 @@ class TarFile(object):
try:
shutil.copy2(linkpath, targetpath)
except EnvironmentError, e:
- raise IOError, "link could not be created"
+ raise IOError("link could not be created")
def chown(self, tarinfo, targetpath):
"""Set owner of targetpath according to tarinfo.
@@ -1634,7 +1703,7 @@ class TarFile(object):
if sys.platform != "os2emx":
os.chown(targetpath, u, g)
except EnvironmentError, e:
- raise ExtractError, "could not change owner"
+ raise ExtractError("could not change owner")
def chmod(self, tarinfo, targetpath):
"""Set file permissions of targetpath according to tarinfo.
@@ -1643,7 +1712,7 @@ class TarFile(object):
try:
os.chmod(targetpath, tarinfo.mode)
except EnvironmentError, e:
- raise ExtractError, "could not change mode"
+ raise ExtractError("could not change mode")
def utime(self, tarinfo, targetpath):
"""Set modification time of targetpath according to tarinfo.
@@ -1657,10 +1726,9 @@ class TarFile(object):
try:
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
except EnvironmentError, e:
- raise ExtractError, "could not change modification time"
+ raise ExtractError("could not change modification time")
#--------------------------------------------------------------------------
-
def next(self):
"""Return the next member of the archive as a TarInfo object, when
TarFile is opened for reading. Return None if there is no more
@@ -1678,70 +1746,91 @@ class TarFile(object):
buf = self.fileobj.read(BLOCKSIZE)
if not buf:
return None
+
try:
tarinfo = TarInfo.frombuf(buf)
- except ValueError:
+
+ # We shouldn't rely on this checksum, because some tar programs
+ # calculate it differently and it is merely validating the
+ # header block. We could just as well skip this part, which would
+ # have a slight effect on performance...
+ if tarinfo.chksum not in calc_chksums(buf):
+ self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
+
+ # Set the TarInfo object's offset to the current position of the
+ # TarFile and set self.offset to the position where the data blocks
+ # should begin.
+ tarinfo.offset = self.offset
+ self.offset += BLOCKSIZE
+
+ tarinfo = self.proc_member(tarinfo)
+
+ except ValueError, e:
if self.ignore_zeros:
- if buf.count(NUL) == BLOCKSIZE:
- adj = "empty"
- else:
- adj = "invalid"
- self._dbg(2, "0x%X: %s block" % (self.offset, adj))
+ self._dbg(2, "0x%X: empty or invalid block: %s" %
+ (self.offset, e))
self.offset += BLOCKSIZE
continue
else:
- # Block is empty or unreadable.
if self.offset == 0:
- # If the first block is invalid. That does not
- # look like a tar archive we can handle.
- raise ReadError,"empty, unreadable or compressed file"
+ raise ReadError("empty, unreadable or compressed "
+ "file: %s" % e)
return None
break
- # We shouldn't rely on this checksum, because some tar programs
- # calculate it differently and it is merely validating the
- # header block. We could just as well skip this part, which would
- # have a slight effect on performance...
- if tarinfo.chksum != calc_chksum(buf):
- self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
-
- # Set the TarInfo object's offset to the current position of the
- # TarFile and set self.offset to the position where the data blocks
- # should begin.
- tarinfo.offset = self.offset
- self.offset += BLOCKSIZE
+ # Some old tar programs represent a directory as a regular
+ # file with a trailing slash.
+ if tarinfo.isreg() and tarinfo.name.endswith("/"):
+ tarinfo.type = DIRTYPE
- # Check if the TarInfo object has a typeflag for which a callback
- # method is registered in the TYPE_METH. If so, then call it.
- if tarinfo.type in self.TYPE_METH:
- return self.TYPE_METH[tarinfo.type](self, tarinfo)
+ # The prefix field is used for filenames > 100 in
+ # the POSIX standard.
+ # name = prefix + '/' + name
+ tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
+ tarinfo.name))
- tarinfo.offset_data = self.offset
- if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
- # Skip the following data blocks.
- self.offset += self._block(tarinfo.size)
+ # Directory names should have a '/' at the end.
+ if tarinfo.isdir():
+ tarinfo.name += "/"
self.members.append(tarinfo)
return tarinfo
#--------------------------------------------------------------------------
- # Below are some methods which are called for special typeflags in the
- # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
- # are registered in TYPE_METH below. You can register your own methods
- # with this mapping.
- # A registered method is called with a TarInfo object as only argument.
- #
- # During its execution the method MUST perform the following tasks:
- # 1. set tarinfo.offset_data to the position where the data blocks begin,
- # if there is data to follow.
- # 2. set self.offset to the position where the next member's header will
+ # The following are methods that are called depending on the type of a
+ # member. The entry point is proc_member() which is called with a TarInfo
+ # object created from the header block from the current offset. The
+ # proc_member() method can be overridden in a subclass to add custom
+ # proc_*() methods. A proc_*() method MUST implement the following
+ # operations:
+ # 1. Set tarinfo.offset_data to the position where the data blocks begin,
+ # if there is data that follows.
+ # 2. Set self.offset to the position where the next member's header will
# begin.
- # 3. append the tarinfo object to self.members, if it is supposed to appear
- # as a member of the TarFile object.
- # 4. return tarinfo or another valid TarInfo object.
+ # 3. Return tarinfo or another valid TarInfo object.
+ def proc_member(self, tarinfo):
+ """Choose the right processing method for tarinfo depending
+ on its type and call it.
+ """
+ if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+ return self.proc_gnulong(tarinfo)
+ elif tarinfo.type == GNUTYPE_SPARSE:
+ return self.proc_sparse(tarinfo)
+ else:
+ return self.proc_builtin(tarinfo)
+
+ def proc_builtin(self, tarinfo):
+ """Process a builtin type member or an unknown member
+ which will be treated as a regular file.
+ """
+ tarinfo.offset_data = self.offset
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Skip the following data blocks.
+ self.offset += self._block(tarinfo.size)
+ return tarinfo
def proc_gnulong(self, tarinfo):
- """Evaluate the blocks that hold a GNU longname
+ """Process the blocks that hold a GNU longname
or longlink member.
"""
buf = ""
@@ -1752,21 +1841,27 @@ class TarFile(object):
self.offset += BLOCKSIZE
count -= BLOCKSIZE
- # Fetch the next header
- next = self.next()
+ # Fetch the next header and process it.
+ b = self.fileobj.read(BLOCKSIZE)
+ t = TarInfo.frombuf(b)
+ t.offset = self.offset
+ self.offset += BLOCKSIZE
+ next = self.proc_member(t)
+ # Patch the TarInfo object from the next header with
+ # the longname information.
next.offset = tarinfo.offset
if tarinfo.type == GNUTYPE_LONGNAME:
- next.name = nts(buf)
+ next.name = buf.rstrip(NUL)
elif tarinfo.type == GNUTYPE_LONGLINK:
- next.linkname = nts(buf)
+ next.linkname = buf.rstrip(NUL)
return next
def proc_sparse(self, tarinfo):
- """Analyze a GNU sparse header plus extra headers.
+ """Process a GNU sparse header plus extra headers.
"""
- buf = tarinfo.tobuf()
+ buf = tarinfo.buf
sp = _ringbuffer()
pos = 386
lastpos = 0L
@@ -1775,8 +1870,8 @@ class TarFile(object):
# first header.
for i in xrange(4):
try:
- offset = int(buf[pos:pos + 12], 8)
- numbytes = int(buf[pos + 12:pos + 24], 8)
+ offset = nti(buf[pos:pos + 12])
+ numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset > lastpos:
@@ -1787,7 +1882,7 @@ class TarFile(object):
pos += 24
isextended = ord(buf[482])
- origsize = int(buf[483:495], 8)
+ origsize = nti(buf[483:495])
# If the isextended flag is given,
# there are extra headers to process.
@@ -1797,8 +1892,8 @@ class TarFile(object):
pos = 0
for i in xrange(21):
try:
- offset = int(buf[pos:pos + 12], 8)
- numbytes = int(buf[pos + 12:pos + 24], 8)
+ offset = nti(buf[pos:pos + 12])
+ numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset > lastpos:
@@ -1818,17 +1913,11 @@ class TarFile(object):
self.offset += self._block(tarinfo.size)
tarinfo.size = origsize
- self.members.append(tarinfo)
- return tarinfo
+ # Clear the prefix field so that it is not used
+ # as a pathname in next().
+ tarinfo.prefix = ""
- # The type mapping for the next() method. The keys are single character
- # strings, the typeflag. The values are methods which are called when
- # next() encounters such a typeflag.
- TYPE_METH = {
- GNUTYPE_LONGNAME: proc_gnulong,
- GNUTYPE_LONGLINK: proc_gnulong,
- GNUTYPE_SPARSE: proc_sparse
- }
+ return tarinfo
#--------------------------------------------------------------------------
# Little helper methods:
@@ -1873,9 +1962,9 @@ class TarFile(object):
corresponds to TarFile's mode.
"""
if self.closed:
- raise IOError, "%s is closed" % self.__class__.__name__
+ raise IOError("%s is closed" % self.__class__.__name__)
if mode is not None and self._mode not in mode:
- raise IOError, "bad operation for mode %r" % self._mode
+ raise IOError("bad operation for mode %r" % self._mode)
def __iter__(self):
"""Provide an iterator object.
@@ -2011,7 +2100,7 @@ class TarFileCompat:
elif compression == TAR_GZIPPED:
self.tarfile = TarFile.gzopen(file, mode)
else:
- raise ValueError, "unknown compression constant"
+ raise ValueError("unknown compression constant")
if mode[0:1] == "r":
members = self.tarfile.getmembers()
for m in members: