summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Gustäbel <lars@gustaebel.de>2008-04-14 10:05:48 (GMT)
committerLars Gustäbel <lars@gustaebel.de>2008-04-14 10:05:48 (GMT)
commitc2ea8c6c3ace398ed757f104d59b32ecad046281 (patch)
treed872662366247b986ae63e1a787b434937682576
parent13d4a61075d4340cd7aa1308b2a0b25aac2cc353 (diff)
downloadcpython-c2ea8c6c3ace398ed757f104d59b32ecad046281.zip
cpython-c2ea8c6c3ace398ed757f104d59b32ecad046281.tar.gz
cpython-c2ea8c6c3ace398ed757f104d59b32ecad046281.tar.bz2
Issue #2058: Remove the buf attribute and add __slots__ to the
TarInfo class in order to reduce tarfile's memory usage.
-rw-r--r--Lib/tarfile.py77
-rw-r--r--Misc/NEWS3
2 files changed, 46 insertions, 34 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index b789cca..c744951 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -767,7 +767,7 @@ class ExFileObject(object):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
- getattr(tarinfo, "sparse", None))
+ tarinfo.sparse)
self.name = tarinfo.name
self.mode = "r"
self.closed = False
@@ -906,6 +906,12 @@ class TarInfo(object):
usually created internally.
"""
+ __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
+ "chksum", "type", "linkname", "uname", "gname",
+ "devmajor", "devminor",
+ "offset", "offset_data", "pax_headers", "sparse",
+ "tarfile", "_sparse_structs", "_link_target")
+
def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name
of the member.
@@ -927,6 +933,7 @@ class TarInfo(object):
self.offset = 0 # the tar header starts here
self.offset_data = 0 # the file's data starts here
+ self.sparse = None # sparse member information
self.pax_headers = {} # pax header information
# In pax headers the "name" and "linkname" field are called
@@ -1181,7 +1188,6 @@ class TarInfo(object):
raise HeaderError("bad checksum")
obj = cls()
- obj.buf = buf
obj.name = nts(buf[0:100], encoding, errors)
obj.mode = nti(buf[100:108])
obj.uid = nti(buf[108:116])
@@ -1202,6 +1208,24 @@ class TarInfo(object):
if obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE
+ # The old GNU sparse format occupies some of the unused
+ # space in the buffer for up to 4 sparse structures.
+ # Save the them for later processing in _proc_sparse().
+ if obj.type == GNUTYPE_SPARSE:
+ pos = 386
+ structs = []
+ for i in range(4):
+ try:
+ offset = nti(buf[pos:pos + 12])
+ numbytes = nti(buf[pos + 12:pos + 24])
+ except ValueError:
+ break
+ structs.append((offset, numbytes))
+ pos += 24
+ isextended = bool(buf[482])
+ origsize = nti(buf[483:495])
+ obj._sparse_structs = (structs, isextended, origsize)
+
# Remove redundant slashes from directories.
if obj.isdir():
obj.name = obj.name.rstrip("/")
@@ -1288,31 +1312,11 @@ class TarInfo(object):
def _proc_sparse(self, tarfile):
"""Process a GNU sparse header plus extra headers.
"""
- buf = self.buf
- sp = _ringbuffer()
- pos = 386
- lastpos = 0
- realpos = 0
- # There are 4 possible sparse structs in the
- # first header.
- for i in range(4):
- try:
- offset = nti(buf[pos:pos + 12])
- numbytes = nti(buf[pos + 12:pos + 24])
- except ValueError:
- break
- if offset > lastpos:
- sp.append(_hole(lastpos, offset - lastpos))
- sp.append(_data(offset, numbytes, realpos))
- realpos += numbytes
- lastpos = offset + numbytes
- pos += 24
-
- isextended = bool(buf[482])
- origsize = nti(buf[483:495])
+ # We already collected some sparse structures in frombuf().
+ structs, isextended, origsize = self._sparse_structs
+ del self._sparse_structs
- # If the isextended flag is given,
- # there are extra headers to process.
+ # Collect sparse structures from extended header blocks.
while isextended:
buf = tarfile.fileobj.read(BLOCKSIZE)
pos = 0
@@ -1322,18 +1326,23 @@ class TarInfo(object):
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
- if offset > lastpos:
- sp.append(_hole(lastpos, offset - lastpos))
- sp.append(_data(offset, numbytes, realpos))
- realpos += numbytes
- lastpos = offset + numbytes
+ structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[504])
+ # Transform the sparse structures to something we can use
+ # in ExFileObject.
+ self.sparse = _ringbuffer()
+ lastpos = 0
+ realpos = 0
+ for offset, numbytes in structs:
+ if offset > lastpos:
+ self.sparse.append(_hole(lastpos, offset - lastpos))
+ self.sparse.append(_data(offset, numbytes, realpos))
+ realpos += numbytes
+ lastpos = offset + numbytes
if lastpos < origsize:
- sp.append(_hole(lastpos, origsize - lastpos))
-
- self.sparse = sp
+ self.sparse.append(_hole(lastpos, origsize - lastpos))
self.offset_data = tarfile.fileobj.tell()
tarfile.offset = self.offset_data + self._block(self.size)
diff --git a/Misc/NEWS b/Misc/NEWS
index 40de496..58524ee 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -29,6 +29,9 @@ Extension Modules
Library
-------
+- Issue #2058: Remove the buf attribute and add __slots__ to the TarInfo
+ class in order to reduce tarfile's memory usage.
+
- Bug #2606: Avoid calling .sort() on a dict_keys object.
- The bundled libffi copy is now in sync with the recently released