summaryrefslogtreecommitdiffstats
path: root/Lib/tarfile.py
diff options
context:
space:
mode:
authorAndrew M. Kuchling <amk@amk.ca>2004-07-10 22:02:11 (GMT)
committerAndrew M. Kuchling <amk@amk.ca>2004-07-10 22:02:11 (GMT)
commit864bba1981cbb3284b43cf19a40f909296323d02 (patch)
treec5bb383d47c8fddbf79506fb085ac159cd0f5007 /Lib/tarfile.py
parentf027ca816741fcea6267c6e8ea33ae43edfb7447 (diff)
downloadcpython-864bba1981cbb3284b43cf19a40f909296323d02.zip
cpython-864bba1981cbb3284b43cf19a40f909296323d02.tar.gz
cpython-864bba1981cbb3284b43cf19a40f909296323d02.tar.bz2
[Patch 988444]
Read multiple special headers - fixed/improved handling of extended/special headers in read-mode (adding new extended headers should be less painful now). - improved nts() function. - removed TarFile.chunks datastructure which is not (and was never) needed. - fixed TarInfo.tobuf(), fields could overflow with too large values, values are now clipped.
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r--Lib/tarfile.py65
1 files changed, 31 insertions, 34 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 6f44146..41257f1 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -135,7 +135,7 @@ TOEXEC = 0001 # execute/search by other
def nts(s):
"""Convert a null-terminated string buffer to a python string.
"""
- return s.split(NUL, 1)[0]
+ return s.rstrip(NUL)
def calc_chksum(buf):
"""Calculate the checksum for a member's header. It's a simple addition
@@ -713,7 +713,7 @@ class TarInfo(object):
(self.prefix, 155)
):
l = len(value)
- parts.append(value + (fieldsize - l) * NUL)
+ parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
buf = "".join(parts)
chksum = calc_chksum(buf)
@@ -796,7 +796,6 @@ class TarFile(object):
self.closed = False
self.members = [] # list of members as TarInfo objects
self.membernames = [] # names of members
- self.chunks = [0] # chunk cache
self._loaded = False # flag if all members have been read
self.offset = 0L # current position in the archive file
self.inodes = {} # dictionary caching the inodes of
@@ -1281,9 +1280,7 @@ class TarFile(object):
blocks += 1
self.offset += blocks * BLOCKSIZE
- self.members.append(tarinfo)
- self.membernames.append(tarinfo.name)
- self.chunks.append(self.offset)
+ self._record_member(tarinfo)
def extract(self, member, path=""):
"""Extract a member from the archive to the current working directory,
@@ -1551,7 +1548,7 @@ class TarFile(object):
return m
# Read the next block.
- self.fileobj.seek(self.chunks[-1])
+ self.fileobj.seek(self.offset)
while True:
buf = self.fileobj.read(BLOCKSIZE)
if not buf:
@@ -1569,7 +1566,7 @@ class TarFile(object):
continue
else:
# Block is empty or unreadable.
- if self.chunks[-1] == 0:
+ if self.offset == 0:
# If the first block is invalid. That does not
# look like a tar archive we can handle.
raise ReadError,"empty, unreadable or compressed file"
@@ -1592,20 +1589,18 @@ class TarFile(object):
# Check if the TarInfo object has a typeflag for which a callback
# method is registered in the TYPE_METH. If so, then call it.
if tarinfo.type in self.TYPE_METH:
- tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
- else:
- tarinfo.offset_data = self.offset
- if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
- # Skip the following data blocks.
- self.offset += self._block(tarinfo.size)
+ return self.TYPE_METH[tarinfo.type](self, tarinfo)
+
+ tarinfo.offset_data = self.offset
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Skip the following data blocks.
+ self.offset += self._block(tarinfo.size)
if tarinfo.isreg() and tarinfo.name[:-1] == "/":
# some old tar programs don't know DIRTYPE
tarinfo.type = DIRTYPE
- self.members.append(tarinfo)
- self.membernames.append(tarinfo.name)
- self.chunks.append(self.offset)
+ self._record_member(tarinfo)
return tarinfo
#--------------------------------------------------------------------------
@@ -1620,7 +1615,9 @@ class TarFile(object):
# if there is data to follow.
# 2. set self.offset to the position where the next member's header will
# begin.
- # 3. return a valid TarInfo object.
+ # 3. call self._record_member() if the tarinfo object is supposed to
+ # appear as a member of the TarFile object.
+ # 4. return tarinfo or another valid TarInfo object.
def proc_gnulong(self, tarinfo):
"""Evaluate the blocks that hold a GNU longname
@@ -1636,24 +1633,16 @@ class TarFile(object):
self.offset += BLOCKSIZE
count -= BLOCKSIZE
- if tarinfo.type == GNUTYPE_LONGNAME:
- name = nts(buf)
- if tarinfo.type == GNUTYPE_LONGLINK:
- linkname = nts(buf)
-
- buf = self.fileobj.read(BLOCKSIZE)
+ # Fetch the next header
+ next = self.next()
- tarinfo = TarInfo.frombuf(buf)
- tarinfo.offset = self.offset
- self.offset += BLOCKSIZE
- tarinfo.offset_data = self.offset
- tarinfo.name = name or tarinfo.name
- tarinfo.linkname = linkname or tarinfo.linkname
+ next.offset = tarinfo.offset
+ if tarinfo.type == GNUTYPE_LONGNAME:
+ next.name = nts(buf)
+ elif tarinfo.type == GNUTYPE_LONGLINK:
+ next.linkname = nts(buf)
- if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
- # Skip the following data blocks.
- self.offset += self._block(tarinfo.size)
- return tarinfo
+ return next
def proc_sparse(self, tarinfo):
"""Analyze a GNU sparse header plus extra headers.
@@ -1709,6 +1698,8 @@ class TarFile(object):
tarinfo.offset_data = self.offset
self.offset += self._block(tarinfo.size)
tarinfo.size = origsize
+
+ self._record_member(tarinfo)
return tarinfo
# The type mapping for the next() method. The keys are single character
@@ -1745,6 +1736,12 @@ class TarFile(object):
if name == self.membernames[i]:
return self.members[i]
+ def _record_member(self, tarinfo):
+ """Record a tarinfo object in the internal datastructures.
+ """
+ self.members.append(tarinfo)
+ self.membernames.append(tarinfo.name)
+
def _load(self):
"""Read through the entire archive file and look for readable
members.