summaryrefslogtreecommitdiffstats
path: root/Lib/tarfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r--Lib/tarfile.py324
1 files changed, 180 insertions, 144 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 14553a7..3ffdff3 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -147,7 +147,10 @@ def nti(s):
# There are two possible encodings for a number field, see
# itn() below.
if s[0] != chr(0200):
- n = int(s.rstrip(NUL + " ") or "0", 8)
+ try:
+ n = int(s.rstrip(NUL + " ") or "0", 8)
+ except ValueError:
+ raise HeaderError("invalid header")
else:
n = 0L
for i in xrange(len(s) - 1):
@@ -282,6 +285,9 @@ class CompressionError(TarError):
class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
pass
+class HeaderError(TarError):
+ """Exception for invalid headers."""
+ pass
#---------------------------
# internal stream interface
@@ -624,140 +630,194 @@ class _BZ2Proxy(object):
#------------------------
# Extraction file object
#------------------------
-class ExFileObject(object):
- """File-like object for reading an archive member.
- Is returned by TarFile.extractfile(). Support for
- sparse files included.
+class _FileInFile(object):
+ """A thin wrapper around an existing file object that
+ provides a part of its data as an individual file
+ object.
"""
- def __init__(self, tarfile, tarinfo):
- self.fileobj = tarfile.fileobj
- self.name = tarinfo.name
- self.mode = "r"
- self.closed = False
- self.offset = tarinfo.offset_data
- self.size = tarinfo.size
- self.pos = 0L
- self.linebuffer = ""
- if tarinfo.issparse():
- self.sparse = tarinfo.sparse
- self.read = self._readsparse
- else:
- self.read = self._readnormal
+ def __init__(self, fileobj, offset, size, sparse=None):
+ self.fileobj = fileobj
+ self.offset = offset
+ self.size = size
+ self.sparse = sparse
+ self.position = 0
- def __read(self, size):
- """Overloadable read method.
+ def tell(self):
+ """Return the current file position.
"""
- return self.fileobj.read(size)
+ return self.position
- def readline(self, size=-1):
- """Read a line with approx. size. If size is negative,
- read a whole line. readline() and read() must not
- be mixed up (!).
+ def seek(self, position):
+ """Seek to a position in the file.
"""
- if size < 0:
- size = sys.maxint
+ self.position = position
- nl = self.linebuffer.find("\n")
- if nl >= 0:
- nl = min(nl, size)
+ def read(self, size=None):
+ """Read data from the file.
+ """
+ if size is None:
+ size = self.size - self.position
else:
- size -= len(self.linebuffer)
- while (nl < 0 and size > 0):
- buf = self.read(min(size, 100))
- if not buf:
- break
- self.linebuffer += buf
- size -= len(buf)
- nl = self.linebuffer.find("\n")
- if nl == -1:
- s = self.linebuffer
- self.linebuffer = ""
- return s
- buf = self.linebuffer[:nl]
- self.linebuffer = self.linebuffer[nl + 1:]
- while buf[-1:] == "\r":
- buf = buf[:-1]
- return buf + "\n"
+ size = min(size, self.size - self.position)
- def readlines(self):
- """Return a list with all (following) lines.
- """
- result = []
- while True:
- line = self.readline()
- if not line: break
- result.append(line)
- return result
+ if self.sparse is None:
+ return self.readnormal(size)
+ else:
+ return self.readsparse(size)
- def _readnormal(self, size=None):
+ def readnormal(self, size):
"""Read operation for regular files.
"""
- if self.closed:
- raise ValueError("file is closed")
- self.fileobj.seek(self.offset + self.pos)
- bytesleft = self.size - self.pos
- if size is None:
- bytestoread = bytesleft
- else:
- bytestoread = min(size, bytesleft)
- self.pos += bytestoread
- return self.__read(bytestoread)
+ self.fileobj.seek(self.offset + self.position)
+ self.position += size
+ return self.fileobj.read(size)
- def _readsparse(self, size=None):
+ def readsparse(self, size):
"""Read operation for sparse files.
"""
- if self.closed:
- raise ValueError("file is closed")
-
- if size is None:
- size = self.size - self.pos
-
data = []
while size > 0:
- buf = self._readsparsesection(size)
+ buf = self.readsparsesection(size)
if not buf:
break
size -= len(buf)
data.append(buf)
return "".join(data)
- def _readsparsesection(self, size):
+ def readsparsesection(self, size):
"""Read a single section of a sparse file.
"""
- section = self.sparse.find(self.pos)
+ section = self.sparse.find(self.position)
if section is None:
return ""
- toread = min(size, section.offset + section.size - self.pos)
+ size = min(size, section.offset + section.size - self.position)
+
if isinstance(section, _data):
- realpos = section.realpos + self.pos - section.offset
- self.pos += toread
+ realpos = section.realpos + self.position - section.offset
self.fileobj.seek(self.offset + realpos)
- return self.__read(toread)
+ self.position += size
+ return self.fileobj.read(size)
else:
- self.pos += toread
- return NUL * toread
+ self.position += size
+ return NUL * size
+#class _FileInFile
+
+
+class ExFileObject(object):
+ """File-like object for reading an archive member.
+ Is returned by TarFile.extractfile().
+ """
+ blocksize = 1024
+
+ def __init__(self, tarfile, tarinfo):
+ self.fileobj = _FileInFile(tarfile.fileobj,
+ tarinfo.offset_data,
+ tarinfo.size,
+ getattr(tarinfo, "sparse", None))
+ self.name = tarinfo.name
+ self.mode = "r"
+ self.closed = False
+ self.size = tarinfo.size
+
+ self.position = 0
+ self.buffer = ""
+
+ def read(self, size=None):
+ """Read at most size bytes from the file. If size is not
+ present or None, read all data until EOF is reached.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ buf = ""
+ if self.buffer:
+ if size is None:
+ buf = self.buffer
+ self.buffer = ""
+ else:
+ buf = self.buffer[:size]
+ self.buffer = self.buffer[size:]
+
+ if size is None:
+ buf += self.fileobj.read()
+ else:
+ buf += self.fileobj.read(size - len(buf))
+
+ self.position += len(buf)
+ return buf
+
+ def readline(self, size=-1):
+ """Read one entire line from the file. If size is present
+ and non-negative, return a string with at most that
+ size, which may be an incomplete line.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ if "\n" in self.buffer:
+ pos = self.buffer.find("\n") + 1
+ else:
+ buffers = [self.buffer]
+ while True:
+ buf = self.fileobj.read(self.blocksize)
+ buffers.append(buf)
+ if not buf or "\n" in buf:
+ self.buffer = "".join(buffers)
+ pos = self.buffer.find("\n") + 1
+ if pos == 0:
+ # no newline found.
+ pos = len(self.buffer)
+ break
+
+ if size != -1:
+ pos = min(size, pos)
+
+ buf = self.buffer[:pos]
+ self.buffer = self.buffer[pos:]
+ self.position += len(buf)
+ return buf
+
+ def readlines(self):
+ """Return a list with all remaining lines.
+ """
+ result = []
+ while True:
+ line = self.readline()
+ if not line: break
+ result.append(line)
+ return result
def tell(self):
"""Return the current file position.
"""
- return self.pos
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ return self.position
- def seek(self, pos, whence=0):
+ def seek(self, pos, whence=os.SEEK_SET):
"""Seek to a position in the file.
"""
- self.linebuffer = ""
- if whence == 0:
- self.pos = min(max(pos, 0), self.size)
- if whence == 1:
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ if whence == os.SEEK_SET:
+ self.position = min(max(pos, 0), self.size)
+ elif whence == os.SEEK_CUR:
if pos < 0:
- self.pos = max(self.pos + pos, 0)
+ self.position = max(self.position + pos, 0)
else:
- self.pos = min(self.pos + pos, self.size)
- if whence == 2:
- self.pos = max(min(self.size + pos, self.size), 0)
+ self.position = min(self.position + pos, self.size)
+ elif whence == os.SEEK_END:
+ self.position = max(min(self.size + pos, self.size), 0)
+ else:
+ raise ValueError("Invalid argument")
+
+ self.buffer = ""
+ self.fileobj.seek(self.position)
def close(self):
"""Close the file object.
@@ -765,20 +825,13 @@ class ExFileObject(object):
self.closed = True
def __iter__(self):
- """Get an iterator over the file object.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
- return self
-
- def next(self):
- """Get the next item from the file iterator.
+ """Get an iterator over the file's lines.
"""
- result = self.readline()
- if not result:
- raise StopIteration
- return result
-
+ while True:
+ line = self.readline()
+ if not line:
+ break
+ yield line
#class ExFileObject
#------------------
@@ -821,9 +874,13 @@ class TarInfo(object):
"""Construct a TarInfo object from a 512 byte string buffer.
"""
if len(buf) != BLOCKSIZE:
- raise ValueError("truncated header")
+ raise HeaderError("truncated header")
if buf.count(NUL) == BLOCKSIZE:
- raise ValueError("empty header")
+ raise HeaderError("empty header")
+
+ chksum = nti(buf[148:156])
+ if chksum not in calc_chksums(buf):
+ raise HeaderError("bad checksum")
tarinfo = cls()
tarinfo.buf = buf
@@ -833,7 +890,7 @@ class TarInfo(object):
tarinfo.gid = nti(buf[116:124])
tarinfo.size = nti(buf[124:136])
tarinfo.mtime = nti(buf[136:148])
- tarinfo.chksum = nti(buf[148:156])
+ tarinfo.chksum = chksum
tarinfo.type = buf[156:157]
tarinfo.linkname = buf[157:257].rstrip(NUL)
tarinfo.uname = buf[265:297].rstrip(NUL)
@@ -845,8 +902,6 @@ class TarInfo(object):
if prefix and not tarinfo.issparse():
tarinfo.name = prefix + "/" + tarinfo.name
- if tarinfo.chksum not in calc_chksums(buf):
- raise ValueError("invalid header")
return tarinfo
def tobuf(self, posix=False):
@@ -999,7 +1054,7 @@ class TarFile(object):
can be determined, `mode' is overridden by `fileobj's mode.
`fileobj' is not closed, when TarFile is closed.
"""
- self.name = name
+ self.name = os.path.abspath(name)
if len(mode) > 1 or mode not in "raw":
raise ValueError("mode must be 'r', 'a' or 'w'")
@@ -1011,7 +1066,7 @@ class TarFile(object):
self._extfileobj = False
else:
if self.name is None and hasattr(fileobj, "name"):
- self.name = fileobj.name
+ self.name = os.path.abspath(fileobj.name)
if hasattr(fileobj, "mode"):
self.mode = fileobj.mode
self._extfileobj = True
@@ -1088,9 +1143,13 @@ class TarFile(object):
# Find out which *open() is appropriate for opening the file.
for comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
+ if fileobj is not None:
+ saved_pos = fileobj.tell()
try:
return func(name, "r", fileobj)
except (ReadError, CompressionError):
+ if fileobj is not None:
+ fileobj.seek(saved_pos)
continue
raise ReadError("file could not be opened successfully")
@@ -1147,24 +1206,12 @@ class TarFile(object):
except (ImportError, AttributeError):
raise CompressionError("gzip module is not available")
- pre, ext = os.path.splitext(name)
- pre = os.path.basename(pre)
- if ext == ".tgz":
- ext = ".tar"
- if ext == ".gz":
- ext = ""
- tarname = pre + ext
-
if fileobj is None:
fileobj = _open(name, mode + "b")
- if mode != "r":
- name = tarname
-
try:
- t = cls.taropen(tarname, mode,
- gzip.GzipFile(name, mode, compresslevel, fileobj)
- )
+ t = cls.taropen(name, mode,
+ gzip.GzipFile(name, mode, compresslevel, fileobj))
except IOError:
raise ReadError("not a gzip file")
t._extfileobj = False
@@ -1183,21 +1230,13 @@ class TarFile(object):
except ImportError:
raise CompressionError("bz2 module is not available")
- pre, ext = os.path.splitext(name)
- pre = os.path.basename(pre)
- if ext == ".tbz2":
- ext = ".tar"
- if ext == ".bz2":
- ext = ""
- tarname = pre + ext
-
if fileobj is not None:
fileobj = _BZ2Proxy(fileobj, mode)
else:
fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
try:
- t = cls.taropen(tarname, mode, fileobj)
+ t = cls.taropen(name, mode, fileobj)
except IOError:
raise ReadError("not a bzip2 file")
t._extfileobj = False
@@ -1402,8 +1441,7 @@ class TarFile(object):
arcname = name
# Skip if somebody tries to archive the archive...
- if self.name is not None \
- and os.path.abspath(name) == os.path.abspath(self.name):
+ if self.name is not None and os.path.abspath(name) == self.name:
self._dbg(2, "tarfile: Skipped %r" % name)
return
@@ -1795,16 +1833,14 @@ class TarFile(object):
tarinfo = self.proc_member(tarinfo)
- except ValueError, e:
+ except HeaderError, e:
if self.ignore_zeros:
- self._dbg(2, "0x%X: empty or invalid block: %s" %
- (self.offset, e))
+ self._dbg(2, "0x%X: %s" % (self.offset, e))
self.offset += BLOCKSIZE
continue
else:
if self.offset == 0:
- raise ReadError("empty, unreadable or compressed "
- "file: %s" % e)
+ raise ReadError(str(e))
return None
break