1 files changed, 729 insertions, 368 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index aac6a5d..98ae12f 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -33,7 +33,7 @@
 __version__ = "$Revision$"
 # $Source$
 
-version     = "0.8.0"
+version     = "0.9.0"
 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
 __date__    = "$Date$"
 __cvsid__   = "$Id$"
@@ -50,6 +50,8 @@ import errno
 import time
 import struct
 import copy
+import re
+import operator
 
 if sys.platform == 'mac':
     # This module needs work for MacOS9, especially in the area of pathname
@@ -69,42 +71,71 @@ __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
 #---------------------------------------------------------
 # tar constants
 #---------------------------------------------------------
-NUL        = "\0"               # the null character
-BLOCKSIZE  = 512                # length of processing blocks
+NUL = "\0"                      # the null character
+BLOCKSIZE = 512                 # length of processing blocks
 RECORDSIZE = BLOCKSIZE * 20     # length of records
-MAGIC      = "ustar"            # magic tar string
-VERSION    = "00"               # version number
+GNU_MAGIC = "ustar  \0"         # magic gnu tar string
+POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
 
-LENGTH_NAME    = 100            # maximum length of a filename
-LENGTH_LINK    = 100            # maximum length of a linkname
-LENGTH_PREFIX  = 155            # maximum length of the prefix field
-MAXSIZE_MEMBER = 077777777777L  # maximum size of a file (11 octal digits)
+LENGTH_NAME = 100               # maximum length of a filename
+LENGTH_LINK = 100               # maximum length of a linkname
+LENGTH_PREFIX = 155             # maximum length of the prefix field
 
-REGTYPE  = "0"                  # regular file
+REGTYPE = "0"                   # regular file
 AREGTYPE = "\0"                 # regular file
-LNKTYPE  = "1"                  # link (inside tarfile)
-SYMTYPE  = "2"                  # symbolic link
-CHRTYPE  = "3"                  # character special device
-BLKTYPE  = "4"                  # block special device
-DIRTYPE  = "5"                  # directory
+LNKTYPE = "1"                   # link (inside tarfile)
+SYMTYPE = "2"                   # symbolic link
+CHRTYPE = "3"                   # character special device
+BLKTYPE = "4"                   # block special device
+DIRTYPE = "5"                   # directory
 FIFOTYPE = "6"                  # fifo special device
 CONTTYPE = "7"                  # contiguous file
 
-GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
-GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
-GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
+GNUTYPE_LONGNAME = "L"          # GNU tar longname
+GNUTYPE_LONGLINK = "K"          # GNU tar longlink
+GNUTYPE_SPARSE = "S"            # GNU tar sparse file
+
+XHDTYPE = "x"                   # POSIX.1-2001 extended header
+XGLTYPE = "g"                   # POSIX.1-2001 global header
+SOLARIS_XHDTYPE = "X"           # Solaris extended header
+
+USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
+GNU_FORMAT = 1                  # GNU tar format
+PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
+DEFAULT_FORMAT = GNU_FORMAT
 
 #---------------------------------------------------------
 # tarfile constants
 #---------------------------------------------------------
-SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
-                   SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
+# File types that tarfile supports:
+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
+                   SYMTYPE, DIRTYPE, FIFOTYPE,
                    CONTTYPE, CHRTYPE, BLKTYPE,
                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
                    GNUTYPE_SPARSE)
 
-REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
-                 CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
+# File types that will be treated as a regular file.
+REGULAR_TYPES = (REGTYPE, AREGTYPE,
+                 CONTTYPE, GNUTYPE_SPARSE)
+
+# File types that are part of the GNU tar format.
+GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+             GNUTYPE_SPARSE)
+
+# Fields from a pax header that override a TarInfo attribute.
+PAX_FIELDS = ("path", "linkpath", "size", "mtime",
+              "uid", "gid", "uname", "gname")
+
+# Fields in a pax header that are numbers, all other fields
+# are treated as strings.
+PAX_NUMBER_FIELDS = {
+    "atime": float,
+    "ctime": float,
+    "mtime": float,
+    "uid": int,
+    "gid": int,
+    "size": int
+}
 
 #---------------------------------------------------------
 # Bits used in the mode field, values in octal.
@@ -131,6 +162,13 @@ TOWRITE = 0002           # write by other
 TOEXEC  = 0001           # execute/search by other
 
 #---------------------------------------------------------
+# initialization
+#---------------------------------------------------------
+ENCODING = sys.getfilesystemencoding()
+if ENCODING is None:
+    ENCODING = sys.getdefaultencoding()
+
+#---------------------------------------------------------
 # Some useful functions
 #---------------------------------------------------------
 
@@ -154,7 +192,10 @@ def nti(s):
     # There are two possible encodings for a number field, see
     # itn() below.
     if s[0] != chr(0200):
-        n = int(nts(s) or "0", 8)
+        try:
+            n = int(nts(s) or "0", 8)
+        except ValueError:
+            raise HeaderError("invalid header")
     else:
         n = 0L
         for i in xrange(len(s) - 1):
@@ -162,7 +203,7 @@ def nti(s):
             n += ord(s[i + 1])
     return n
 
-def itn(n, digits=8, posix=False):
+def itn(n, digits=8, format=DEFAULT_FORMAT):
     """Convert a python number to a number field.
     """
     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
@@ -174,7 +215,7 @@ def itn(n, digits=8, posix=False):
     if 0 <= n < 8 ** (digits - 1):
         s = "%0*o" % (digits - 1, n) + NUL
     else:
-        if posix:
+        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
             raise ValueError("overflow in number field")
 
         if n < 0:
@@ -189,6 +230,26 @@ def itn(n, digits=8, posix=False):
         s = chr(0200) + s
     return s
 
+def uts(s, encoding, errors):
+    """Convert a unicode object to a string.
+    """
+    if errors == "utf-8":
+        # An extra error handler similar to the -o invalid=UTF-8 option
+        # in POSIX.1-2001. Replace untranslatable characters with their
+        # UTF-8 representation.
+        try:
+            return s.encode(encoding, "strict")
+        except UnicodeEncodeError:
+            x = []
+            for c in s:
+                try:
+                    x.append(c.encode(encoding, "strict"))
+                except UnicodeEncodeError:
+                    x.append(c.encode("utf8"))
+            return "".join(x)
+    else:
+        return s.encode(encoding, errors)
+
 def calc_chksums(buf):
     """Calculate the checksum for a member's header by summing up all
        characters except for the chksum field which is treated as if
@@ -289,6 +350,9 @@ class CompressionError(TarError):
 class StreamError(TarError):
     """Exception for unsupported operations on stream-like TarFiles."""
     pass
+class HeaderError(TarError):
+    """Exception for invalid headers."""
+    pass
 
 #---------------------------
 # internal stream interface
@@ -306,7 +370,7 @@ class _LowLevelFile:
         }[mode]
         if hasattr(os, "O_BINARY"):
             mode |= os.O_BINARY
-        self.fd = os.open(name, mode)
+        self.fd = os.open(name, mode, 0666)
 
     def close(self):
         os.close(self.fd)
@@ -357,7 +421,7 @@ class _Stream:
             except ImportError:
                 raise CompressionError("zlib module is not available")
             self.zlib = zlib
-            self.crc = zlib.crc32("")
+            self.crc = zlib.crc32("") & 0xffffffffL
             if mode == "r":
                 self._init_read_gz()
             else:
@@ -395,7 +459,7 @@ class _Stream:
         """Write string s to the stream.
         """
         if self.comptype == "gz":
-            self.crc = self.zlib.crc32(s, self.crc)
+            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
         self.pos += len(s)
         if self.comptype != "tar":
             s = self.cmp.compress(s)
@@ -517,7 +581,10 @@ class _Stream:
             buf = self.__read(self.bufsize)
             if not buf:
                 break
-            buf = self.cmp.decompress(buf)
+            try:
+                buf = self.cmp.decompress(buf)
+            except IOError:
+                raise ReadError("invalid compressed data")
             t.append(buf)
             c += len(buf)
         t = "".join(t)
@@ -578,6 +645,7 @@ class _BZ2Proxy(object):
     def __init__(self, fileobj, mode):
         self.fileobj = fileobj
         self.mode = mode
+        self.name = getattr(self.fileobj, "name", None)
         self.init()
 
     def init(self):
@@ -627,7 +695,6 @@ class _BZ2Proxy(object):
         if self.mode == "w":
             raw = self.bz2obj.flush()
             self.fileobj.write(raw)
-        self.fileobj.close()
 # class _BZ2Proxy
 
 #------------------------
@@ -852,8 +919,8 @@ class TarInfo(object):
         """Construct a TarInfo object. name is the optional name
            of the member.
         """
-        self.name = name        # member name (dirnames must end with '/')
-        self.mode = 0666        # file permissions
+        self.name = name        # member name
+        self.mode = 0644        # file permissions
         self.uid = 0            # user id
         self.gid = 0            # group id
         self.size = 0           # file size
@@ -861,147 +928,524 @@ class TarInfo(object):
         self.chksum = 0         # header checksum
         self.type = REGTYPE     # member type
         self.linkname = ""      # link name
-        self.uname = "user"     # user name
-        self.gname = "group"    # group name
+        self.uname = "root"     # user name
+        self.gname = "root"     # group name
         self.devmajor = 0       # device major number
         self.devminor = 0       # device minor number
 
         self.offset = 0         # the tar header starts here
         self.offset_data = 0    # the file's data starts here
 
+        self.pax_headers = {}   # pax header information
+
+    # In pax headers the "name" and "linkname" field are called
+    # "path" and "linkpath".
+    def _getpath(self):
+        return self.name
+    def _setpath(self, name):
+        self.name = name
+    path = property(_getpath, _setpath)
+
+    def _getlinkpath(self):
+        return self.linkname
+    def _setlinkpath(self, linkname):
+        self.linkname = linkname
+    linkpath = property(_getlinkpath, _setlinkpath)
+
     def __repr__(self):
         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 
-    @classmethod
-    def frombuf(cls, buf):
-        """Construct a TarInfo object from a 512 byte string buffer.
+    def get_info(self, encoding, errors):
+        """Return the TarInfo's attributes as a dictionary.
+        """
+        info = {
+            "name":     normpath(self.name),
+            "mode":     self.mode & 07777,
+            "uid":      self.uid,
+            "gid":      self.gid,
+            "size":     self.size,
+            "mtime":    self.mtime,
+            "chksum":   self.chksum,
+            "type":     self.type,
+            "linkname": normpath(self.linkname) if self.linkname else "",
+            "uname":    self.uname,
+            "gname":    self.gname,
+            "devmajor": self.devmajor,
+            "devminor": self.devminor
+        }
+
+        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
+            info["name"] += "/"
+
+        for key in ("name", "linkname", "uname", "gname"):
+            if type(info[key]) is unicode:
+                info[key] = info[key].encode(encoding, errors)
+
+        return info
+
+    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
+        """Return a tar header as a string of 512 byte blocks.
         """
-        if len(buf) != BLOCKSIZE:
-            raise ValueError("truncated header")
-        if buf.count(NUL) == BLOCKSIZE:
-            raise ValueError("empty header")
-
-        tarinfo = cls()
-        tarinfo.buf = buf
-        tarinfo.name = nts(buf[0:100])
-        tarinfo.mode = nti(buf[100:108])
-        tarinfo.uid = nti(buf[108:116])
-        tarinfo.gid = nti(buf[116:124])
-        tarinfo.size = nti(buf[124:136])
-        tarinfo.mtime = nti(buf[136:148])
-        tarinfo.chksum = nti(buf[148:156])
-        tarinfo.type = buf[156:157]
-        tarinfo.linkname = nts(buf[157:257])
-        tarinfo.uname = nts(buf[265:297])
-        tarinfo.gname = nts(buf[297:329])
-        tarinfo.devmajor = nti(buf[329:337])
-        tarinfo.devminor = nti(buf[337:345])
-        prefix = nts(buf[345:500])
+        info = self.get_info(encoding, errors)
+
+        if format == USTAR_FORMAT:
+            return self.create_ustar_header(info)
+        elif format == GNU_FORMAT:
+            return self.create_gnu_header(info)
+        elif format == PAX_FORMAT:
+            return self.create_pax_header(info, encoding, errors)
+        else:
+            raise ValueError("invalid format")
 
-        if prefix and not tarinfo.issparse():
-            tarinfo.name = prefix + "/" + tarinfo.name
+    def create_ustar_header(self, info):
+        """Return the object as a ustar header block.
+        """
+        info["magic"] = POSIX_MAGIC
 
-        if tarinfo.chksum not in calc_chksums(buf):
-            raise ValueError("invalid header")
-        return tarinfo
+        if len(info["linkname"]) > LENGTH_LINK:
+            raise ValueError("linkname is too long")
 
-    def tobuf(self, posix=False):
-        """Return a tar header as a string of 512 byte blocks.
+        if len(info["name"]) > LENGTH_NAME:
+            info["prefix"], info["name"] = self._posix_split_name(info["name"])
+
+        return self._create_header(info, USTAR_FORMAT)
+
+    def create_gnu_header(self, info):
+        """Return the object as a GNU header block sequence.
         """
+        info["magic"] = GNU_MAGIC
+
         buf = ""
-        type = self.type
-        prefix = ""
+        if len(info["linkname"]) > LENGTH_LINK:
+            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
 
-        if self.name.endswith("/"):
-            type = DIRTYPE
+        if len(info["name"]) > LENGTH_NAME:
+            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
 
-        if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
-            # Prevent "././@LongLink" from being normalized.
-            name = self.name
-        else:
-            name = normpath(self.name)
+        return buf + self._create_header(info, GNU_FORMAT)
 
-        if type == DIRTYPE:
-            # directories should end with '/'
-            name += "/"
+    def create_pax_header(self, info, encoding, errors):
+        """Return the object as a ustar header block. If it cannot be
+           represented this way, prepend a pax extended header sequence
+           with supplement information.
+        """
+        info["magic"] = POSIX_MAGIC
+        pax_headers = self.pax_headers.copy()
 
-        linkname = self.linkname
-        if linkname:
-            # if linkname is empty we end up with a '.'
-            linkname = normpath(linkname)
+        # Test string fields for values that exceed the field length or cannot
+        # be represented in ASCII encoding.
+        for name, hname, length in (
+                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
+                ("uname", "uname", 32), ("gname", "gname", 32)):
 
-        if posix:
-            if self.size > MAXSIZE_MEMBER:
-                raise ValueError("file is too large (>= 8 GB)")
+            if hname in pax_headers:
+                # The pax header has priority.
+                continue
 
-            if len(self.linkname) > LENGTH_LINK:
-                raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
+            val = info[name].decode(encoding, errors)
 
-            if len(name) > LENGTH_NAME:
-                prefix = name[:LENGTH_PREFIX + 1]
-                while prefix and prefix[-1] != "/":
-                    prefix = prefix[:-1]
+            # Try to encode the string as ASCII.
+            try:
+                val.encode("ascii")
+            except UnicodeEncodeError:
+                pax_headers[hname] = val
+                continue
+
+            if len(info[name]) > length:
+                pax_headers[hname] = val
+
+        # Test number fields for values that exceed the field limit or values
+        # that like to be stored as float.
+        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
+            if name in pax_headers:
+                # The pax header has priority. Avoid overflow.
+                info[name] = 0
+                continue
+
+            val = info[name]
+            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
+                pax_headers[name] = unicode(val)
+                info[name] = 0
+
+        # Create a pax extended header if necessary.
+        if pax_headers:
+            buf = self._create_pax_generic_header(pax_headers)
+        else:
+            buf = ""
 
-                name = name[len(prefix):]
-                prefix = prefix[:-1]
+        return buf + self._create_header(info, USTAR_FORMAT)
 
-                if not prefix or len(name) > LENGTH_NAME:
-                    raise ValueError("name is too long")
+    @classmethod
+    def create_pax_global_header(cls, pax_headers):
+        """Return the object as a pax global header block sequence.
+        """
+        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
 
-        else:
-            if len(self.linkname) > LENGTH_LINK:
-                buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
+    def _posix_split_name(self, name):
+        """Split a name longer than 100 chars into a prefix
+           and a name part.
+        """
+        prefix = name[:LENGTH_PREFIX + 1]
+        while prefix and prefix[-1] != "/":
+            prefix = prefix[:-1]
+
+        name = name[len(prefix):]
+        prefix = prefix[:-1]
 
-            if len(name) > LENGTH_NAME:
-                buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
+        if not prefix or len(name) > LENGTH_NAME:
+            raise ValueError("name is too long")
+        return prefix, name
 
+    @staticmethod
+    def _create_header(info, format):
+        """Return a header block. info is a dictionary with file
+           information, format must be one of the *_FORMAT constants.
+        """
         parts = [
-            stn(name, 100),
-            itn(self.mode & 07777, 8, posix),
-            itn(self.uid, 8, posix),
-            itn(self.gid, 8, posix),
-            itn(self.size, 12, posix),
-            itn(self.mtime, 12, posix),
+            stn(info.get("name", ""), 100),
+            itn(info.get("mode", 0) & 07777, 8, format),
+            itn(info.get("uid", 0), 8, format),
+            itn(info.get("gid", 0), 8, format),
+            itn(info.get("size", 0), 12, format),
+            itn(info.get("mtime", 0), 12, format),
             "        ", # checksum field
-            type,
-            stn(self.linkname, 100),
-            stn(MAGIC, 6),
-            stn(VERSION, 2),
-            stn(self.uname, 32),
-            stn(self.gname, 32),
-            itn(self.devmajor, 8, posix),
-            itn(self.devminor, 8, posix),
-            stn(prefix, 155)
+            info.get("type", REGTYPE),
+            stn(info.get("linkname", ""), 100),
+            stn(info.get("magic", POSIX_MAGIC), 8),
+            stn(info.get("uname", "root"), 32),
+            stn(info.get("gname", "root"), 32),
+            itn(info.get("devmajor", 0), 8, format),
+            itn(info.get("devminor", 0), 8, format),
+            stn(info.get("prefix", ""), 155)
         ]
 
-        buf += "".join(parts).ljust(BLOCKSIZE, NUL)
+        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
-        self.buf = buf
         return buf
 
-    def _create_gnulong(self, name, type):
-        """Create a GNU longname/longlink header from name.
-           It consists of an extended tar header, with the length
-           of the longname as size, followed by data blocks,
-           which contain the longname as a null terminated string.
+    @staticmethod
+    def _create_payload(payload):
+        """Return the string payload filled with zero bytes
+           up to the next 512 byte border.
+        """
+        blocks, remainder = divmod(len(payload), BLOCKSIZE)
+        if remainder > 0:
+            payload += (BLOCKSIZE - remainder) * NUL
+        return payload
+
+    @classmethod
+    def _create_gnu_long_header(cls, name, type):
+        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
+           for name.
         """
         name += NUL
 
-        tarinfo = self.__class__()
-        tarinfo.name = "././@LongLink"
-        tarinfo.type = type
-        tarinfo.mode = 0
-        tarinfo.size = len(name)
-
-        # create extended header
-        buf = tarinfo.tobuf()
-        # create name blocks
-        buf += name
-        blocks, remainder = divmod(len(name), BLOCKSIZE)
-        if remainder > 0:
-            buf += (BLOCKSIZE - remainder) * NUL
-        return buf
+        info = {}
+        info["name"] = "././@LongLink"
+        info["type"] = type
+        info["size"] = len(name)
+        info["magic"] = GNU_MAGIC
+
+        # create extended header + name blocks.
+        return cls._create_header(info, USTAR_FORMAT) + \
+                cls._create_payload(name)
+
+    @classmethod
+    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
+        """Return a POSIX.1-2001 extended or global header sequence
+           that contains a list of keyword, value pairs. The values
+           must be unicode objects.
+        """
+        records = []
+        for keyword, value in pax_headers.iteritems():
+            keyword = keyword.encode("utf8")
+            value = value.encode("utf8")
+            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
+            n = p = 0
+            while True:
+                n = l + len(str(p))
+                if n == p:
+                    break
+                p = n
+            records.append("%d %s=%s\n" % (p, keyword, value))
+        records = "".join(records)
+
+        # We use a hardcoded "././@PaxHeader" name like star does
+        # instead of the one that POSIX recommends.
+        info = {}
+        info["name"] = "././@PaxHeader"
+        info["type"] = type
+        info["size"] = len(records)
+        info["magic"] = POSIX_MAGIC
+
+        # Create pax header + record blocks.
+        return cls._create_header(info, USTAR_FORMAT) + \
+                cls._create_payload(records)
+
+    @classmethod
+    def frombuf(cls, buf):
+        """Construct a TarInfo object from a 512 byte string buffer.
+        """
+        if len(buf) != BLOCKSIZE:
+            raise HeaderError("truncated header")
+        if buf.count(NUL) == BLOCKSIZE:
+            raise HeaderError("empty header")
+
+        chksum = nti(buf[148:156])
+        if chksum not in calc_chksums(buf):
+            raise HeaderError("bad checksum")
+
+        obj = cls()
+        obj.buf = buf
+        obj.name = nts(buf[0:100])
+        obj.mode = nti(buf[100:108])
+        obj.uid = nti(buf[108:116])
+        obj.gid = nti(buf[116:124])
+        obj.size = nti(buf[124:136])
+        obj.mtime = nti(buf[136:148])
+        obj.chksum = chksum
+        obj.type = buf[156:157]
+        obj.linkname = nts(buf[157:257])
+        obj.uname = nts(buf[265:297])
+        obj.gname = nts(buf[297:329])
+        obj.devmajor = nti(buf[329:337])
+        obj.devminor = nti(buf[337:345])
+        prefix = nts(buf[345:500])
+
+        # Old V7 tar format represents a directory as a regular
+        # file with a trailing slash.
+        if obj.type == AREGTYPE and obj.name.endswith("/"):
+            obj.type = DIRTYPE
+
+        # Remove redundant slashes from directories.
+        if obj.isdir():
+            obj.name = obj.name.rstrip("/")
+
+        # Reconstruct a ustar longname.
+        if prefix and obj.type not in GNU_TYPES:
+            obj.name = prefix + "/" + obj.name
+        return obj
+
+    @classmethod
+    def fromtarfile(cls, tarfile):
+        """Return the next TarInfo object from TarFile object
+           tarfile.
+        """
+        buf = tarfile.fileobj.read(BLOCKSIZE)
+        if not buf:
+            return
+        obj = cls.frombuf(buf)
+        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
+        return obj._proc_member(tarfile)
+
+    #--------------------------------------------------------------------------
+    # The following are methods that are called depending on the type of a
+    # member. The entry point is _proc_member() which can be overridden in a
+    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
+    # implement the following
+    # operations:
+    # 1. Set self.offset_data to the position where the data blocks begin,
+    #    if there is data that follows.
+    # 2. Set tarfile.offset to the position where the next member's header will
+    #    begin.
+    # 3. Return self or another valid TarInfo object.
+    def _proc_member(self, tarfile):
+        """Choose the right processing method depending on
+           the type and call it.
+        """
+        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+            return self._proc_gnulong(tarfile)
+        elif self.type == GNUTYPE_SPARSE:
+            return self._proc_sparse(tarfile)
+        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
+            return self._proc_pax(tarfile)
+        else:
+            return self._proc_builtin(tarfile)
+
+    def _proc_builtin(self, tarfile):
+        """Process a builtin type or an unknown type which
+           will be treated as a regular file.
+        """
+        self.offset_data = tarfile.fileobj.tell()
+        offset = self.offset_data
+        if self.isreg() or self.type not in SUPPORTED_TYPES:
+            # Skip the following data blocks.
+            offset += self._block(self.size)
+        tarfile.offset = offset
+
+        # Patch the TarInfo object with saved global
+        # header information.
+        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
+
+        return self
+
+    def _proc_gnulong(self, tarfile):
+        """Process the blocks that hold a GNU longname
+           or longlink member.
+        """
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # Fetch the next header and process it.
+        next = self.fromtarfile(tarfile)
+        if next is None:
+            raise HeaderError("missing subsequent header")
+
+        # Patch the TarInfo object from the next header with
+        # the longname information.
+        next.offset = self.offset
+        if self.type == GNUTYPE_LONGNAME:
+            next.name = nts(buf)
+        elif self.type == GNUTYPE_LONGLINK:
+            next.linkname = nts(buf)
+
+        return next
+
+    def _proc_sparse(self, tarfile):
+        """Process a GNU sparse header plus extra headers.
+        """
+        buf = self.buf
+        sp = _ringbuffer()
+        pos = 386
+        lastpos = 0L
+        realpos = 0L
+        # There are 4 possible sparse structs in the
+        # first header.
+        for i in xrange(4):
+            try:
+                offset = nti(buf[pos:pos + 12])
+                numbytes = nti(buf[pos + 12:pos + 24])
+            except ValueError:
+                break
+            if offset > lastpos:
+                sp.append(_hole(lastpos, offset - lastpos))
+            sp.append(_data(offset, numbytes, realpos))
+            realpos += numbytes
+            lastpos = offset + numbytes
+            pos += 24
+
+        isextended = ord(buf[482])
+        origsize = nti(buf[483:495])
+
+        # If the isextended flag is given,
+        # there are extra headers to process.
+        while isextended == 1:
+            buf = tarfile.fileobj.read(BLOCKSIZE)
+            pos = 0
+            for i in xrange(21):
+                try:
+                    offset = nti(buf[pos:pos + 12])
+                    numbytes = nti(buf[pos + 12:pos + 24])
+                except ValueError:
+                    break
+                if offset > lastpos:
+                    sp.append(_hole(lastpos, offset - lastpos))
+                sp.append(_data(offset, numbytes, realpos))
+                realpos += numbytes
+                lastpos = offset + numbytes
+                pos += 24
+            isextended = ord(buf[504])
+
+        if lastpos < origsize:
+            sp.append(_hole(lastpos, origsize - lastpos))
+
+        self.sparse = sp
+
+        self.offset_data = tarfile.fileobj.tell()
+        tarfile.offset = self.offset_data + self._block(self.size)
+        self.size = origsize
+
+        return self
+
+    def _proc_pax(self, tarfile):
+        """Process an extended or global header as described in
+           POSIX.1-2001.
+        """
+        # Read the header information.
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # A pax header stores supplemental information for either
+        # the following file (extended) or all following files
+        # (global).
+        if self.type == XGLTYPE:
+            pax_headers = tarfile.pax_headers
+        else:
+            pax_headers = tarfile.pax_headers.copy()
+
+        # Parse pax header information. A record looks like that:
+        # "%d %s=%s\n" % (length, keyword, value). length is the size
+        # of the complete record including the length field itself and
+        # the newline. keyword and value are both UTF-8 encoded strings.
+        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
+        pos = 0
+        while True:
+            match = regex.match(buf, pos)
+            if not match:
+                break
+
+            length, keyword = match.groups()
+            length = int(length)
+            value = buf[match.end(2) + 1:match.start(1) + length - 1]
+
+            keyword = keyword.decode("utf8")
+            value = value.decode("utf8")
+
+            pax_headers[keyword] = value
+            pos += length
+
+        # Fetch the next header.
+        next = self.fromtarfile(tarfile)
+
+        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
+            if next is None:
+                raise HeaderError("missing subsequent header")
+
+            # Patch the TarInfo object with the extended header info.
+            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
+            next.offset = self.offset
+
+            if "size" in pax_headers:
+                # If the extended header replaces the size field,
+                # we need to recalculate the offset where the next
+                # header starts.
+                offset = next.offset_data
+                if next.isreg() or next.type not in SUPPORTED_TYPES:
+                    offset += next._block(next.size)
+                tarfile.offset = offset
+
+        return next
+
+    def _apply_pax_info(self, pax_headers, encoding, errors):
+        """Replace fields with supplemental information from a previous
+           pax extended or global header.
+        """
+        for keyword, value in pax_headers.iteritems():
+            if keyword not in PAX_FIELDS:
+                continue
+
+            if keyword == "path":
+                value = value.rstrip("/")
+
+            if keyword in PAX_NUMBER_FIELDS:
+                try:
+                    value = PAX_NUMBER_FIELDS[keyword](value)
+                except ValueError:
+                    value = 0
+            else:
+                value = uts(value, encoding, errors)
+
+            setattr(self, keyword, value)
+
+        self.pax_headers = pax_headers.copy()
+
+    def _block(self, count):
+        """Round up a byte count by BLOCKSIZE and return it,
+           e.g. _block(834) => 1024.
+        """
+        blocks, remainder = divmod(count, BLOCKSIZE)
+        if remainder:
+            blocks += 1
+        return blocks * BLOCKSIZE
 
     def isreg(self):
         return self.type in REGULAR_TYPES
@@ -1041,12 +1485,19 @@ class TarFile(object):
                                 # messages (if debug >= 0). If > 0, errors
                                 # are passed to the caller as exceptions.
 
-    posix = False               # If True, generates POSIX.1-1990-compliant
-                                # archives (no GNU extensions!)
+    format = DEFAULT_FORMAT     # The format to use when creating an archive.
+
+    encoding = ENCODING         # Encoding for 8-bit character strings.
+
+    errors = None               # Error handler for unicode conversion.
+
+    tarinfo = TarInfo           # The default TarInfo class to use.
 
-    fileobject = ExFileObject
+    fileobject = ExFileObject   # The default ExFileObject class to use.
 
-    def __init__(self, name=None, mode="r", fileobj=None):
+    def __init__(self, name=None, mode="r", fileobj=None, format=None,
+            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
+            errors=None, pax_headers=None, debug=None, errorlevel=None):
         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
            read from an existing archive, 'a' to append data to an existing
            file or 'w' to create a new file overwriting an existing one. `mode'
@@ -1057,22 +1508,55 @@ class TarFile(object):
         """
         if len(mode) > 1 or mode not in "raw":
             raise ValueError("mode must be 'r', 'a' or 'w'")
-        self._mode = mode
-        self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
+        self.mode = mode
+        self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
 
         if not fileobj:
-            fileobj = file(name, self.mode)
+            if self.mode == "a" and not os.path.exists(name):
+                # Create nonexistent files in append mode.
+                self.mode = "w"
+                self._mode = "wb"
+            fileobj = bltn_open(name, self._mode)
             self._extfileobj = False
         else:
             if name is None and hasattr(fileobj, "name"):
                 name = fileobj.name
             if hasattr(fileobj, "mode"):
-                self.mode = fileobj.mode
+                self._mode = fileobj.mode
             self._extfileobj = True
         self.name = os.path.abspath(name) if name else None
         self.fileobj = fileobj
 
-        # Init datastructures
+        # Init attributes.
+        if format is not None:
+            self.format = format
+        if tarinfo is not None:
+            self.tarinfo = tarinfo
+        if dereference is not None:
+            self.dereference = dereference
+        if ignore_zeros is not None:
+            self.ignore_zeros = ignore_zeros
+        if encoding is not None:
+            self.encoding = encoding
+
+        if errors is not None:
+            self.errors = errors
+        elif mode == "r":
+            self.errors = "utf-8"
+        else:
+            self.errors = "strict"
+
+        if pax_headers is not None and self.format == PAX_FORMAT:
+            self.pax_headers = pax_headers
+        else:
+            self.pax_headers = {}
+
+        if debug is not None:
+            self.debug = debug
+        if errorlevel is not None:
+            self.errorlevel = errorlevel
+
+        # Init datastructures.
         self.closed = False
         self.members = []       # list of members as TarInfo objects
         self._loaded = False    # flag if all members have been read
@@ -1081,26 +1565,45 @@ class TarFile(object):
         self.inodes = {}        # dictionary caching the inodes of
                                 # archive members already added
 
-        if self._mode == "r":
-            self.firstmember = None
-            self.firstmember = self.next()
-
-        if self._mode == "a":
-            # Move to the end of the archive,
-            # before the first empty block.
-            self.firstmember = None
-            while True:
-                try:
-                    tarinfo = self.next()
-                except ReadError:
-                    self.fileobj.seek(0)
-                    break
-                if tarinfo is None:
-                    self.fileobj.seek(- BLOCKSIZE, 1)
-                    break
-
-        if self._mode in "aw":
-            self._loaded = True
+        try:
+            if self.mode == "r":
+                self.firstmember = None
+                self.firstmember = self.next()
+
+            if self.mode == "a":
+                # Move to the end of the archive,
+                # before the first empty block.
+                self.firstmember = None
+                while True:
+                    if self.next() is None:
+                        if self.offset > 0:
+                            self.fileobj.seek(- BLOCKSIZE, 1)
+                        break
+
+            if self.mode in "aw":
+                self._loaded = True
+
+                if self.pax_headers:
+                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
+                    self.fileobj.write(buf)
+                    self.offset += len(buf)
+        except:
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
+            raise
+
+    def _getposix(self):
+        return self.format == USTAR_FORMAT
+    def _setposix(self, value):
+        import warnings
+        warnings.warn("use the format attribute instead", DeprecationWarning,
+                      2)
+        if value:
+            self.format = USTAR_FORMAT
+        else:
+            self.format = GNU_FORMAT
+    posix = property(_getposix, _setposix)
 
     #--------------------------------------------------------------------------
     # Below are the classmethods which act as alternate constructors to the
@@ -1114,7 +1617,7 @@ class TarFile(object):
     # by adding it to the mapping in OPEN_METH.
 
     @classmethod
-    def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
+    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
         """Open a tar archive for reading, writing or appending. Return
            an appropriate TarFile class.
 
@@ -1123,7 +1626,7 @@ class TarFile(object):
            'r:'         open for reading exclusively uncompressed
            'r:gz'       open for reading with gzip compression
            'r:bz2'      open for reading with bzip2 compression
-           'a' or 'a:'  open for appending
+           'a' or 'a:'  open for appending, creating the file if necessary
            'w' or 'w:'  open for writing without compression
            'w:gz'       open for writing with gzip compression
            'w:bz2'      open for writing with bzip2 compression
@@ -1147,8 +1650,8 @@ class TarFile(object):
                 if fileobj is not None:
                     saved_pos = fileobj.tell()
                 try:
-                    return func(name, "r", fileobj)
-                except (ReadError, CompressionError):
+                    return func(name, "r", fileobj, **kwargs)
+                except (ReadError, CompressionError), e:
                     if fileobj is not None:
                         fileobj.seek(saved_pos)
                     continue
@@ -1165,7 +1668,7 @@ class TarFile(object):
                 func = getattr(cls, cls.OPEN_METH[comptype])
             else:
                 raise CompressionError("unknown compression type %r" % comptype)
-            return func(name, filemode, fileobj)
+            return func(name, filemode, fileobj, **kwargs)
 
         elif "|" in mode:
             filemode, comptype = mode.split("|", 1)
@@ -1176,25 +1679,26 @@ class TarFile(object):
                 raise ValueError("mode must be 'r' or 'w'")
 
             t = cls(name, filemode,
-                    _Stream(name, filemode, comptype, fileobj, bufsize))
+                    _Stream(name, filemode, comptype, fileobj, bufsize),
+                    **kwargs)
             t._extfileobj = False
             return t
 
         elif mode in "aw":
-            return cls.taropen(name, mode, fileobj)
+            return cls.taropen(name, mode, fileobj, **kwargs)
 
         raise ValueError("undiscernible mode")
 
     @classmethod
-    def taropen(cls, name, mode="r", fileobj=None):
+    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
         """Open uncompressed tar archive name for reading or writing.
         """
         if len(mode) > 1 or mode not in "raw":
             raise ValueError("mode must be 'r', 'a' or 'w'")
-        return cls(name, mode, fileobj)
+        return cls(name, mode, fileobj, **kwargs)
 
     @classmethod
-    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
+    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
         """Open gzip compressed tar archive name for reading or writing.
            Appending is not allowed.
         """
@@ -1208,18 +1712,19 @@ class TarFile(object):
             raise CompressionError("gzip module is not available")
 
         if fileobj is None:
-            fileobj = file(name, mode + "b")
+            fileobj = bltn_open(name, mode + "b")
 
         try:
             t = cls.taropen(name, mode,
-                gzip.GzipFile(name, mode, compresslevel, fileobj))
+                gzip.GzipFile(name, mode, compresslevel, fileobj),
+                **kwargs)
         except IOError:
             raise ReadError("not a gzip file")
         t._extfileobj = False
         return t
 
     @classmethod
-    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
+    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
         """Open bzip2 compressed tar archive name for reading or writing.
            Appending is not allowed.
         """
@@ -1237,7 +1742,7 @@ class TarFile(object):
             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
 
         try:
-            t = cls.taropen(name, mode, fileobj)
+            t = cls.taropen(name, mode, fileobj, **kwargs)
         except IOError:
             raise ReadError("not a bzip2 file")
         t._extfileobj = False
@@ -1260,7 +1765,7 @@ class TarFile(object):
         if self.closed:
             return
 
-        if self._mode in "aw":
+        if self.mode in "aw":
             self.fileobj.write(NUL * (BLOCKSIZE * 2))
             self.offset += (BLOCKSIZE * 2)
             # fill up the end with zero-blocks
@@ -1276,7 +1781,7 @@ class TarFile(object):
     def getmember(self, name):
         """Return a TarInfo object for member `name'. If `name' can not be
            found in the archive, KeyError is raised. If a member occurs more
-           than once in the archive, its last occurence is assumed to be the
+           than once in the archive, its last occurrence is assumed to be the
            most up-to-date version.
         """
         tarinfo = self._getmember(name)
@@ -1326,7 +1831,8 @@ class TarFile(object):
 
         # Now, fill the TarInfo object with
         # information specific for the file.
-        tarinfo = TarInfo()
+        tarinfo = self.tarinfo()
+        tarinfo.tarfile = self
 
         # Use os.stat or os.lstat, depending on platform
         # and if symlinks shall be resolved.
@@ -1342,8 +1848,8 @@ class TarFile(object):
         stmd = statres.st_mode
         if stat.S_ISREG(stmd):
             inode = (statres.st_ino, statres.st_dev)
-            if not self.dereference and \
-                    statres.st_nlink > 1 and inode in self.inodes:
+            if not self.dereference and statres.st_nlink > 1 and \
+                    inode in self.inodes and arcname != self.inodes[inode]:
                 # Is it a hardlink to an already
                 # archived file?
                 type = LNKTYPE
@@ -1356,8 +1862,6 @@ class TarFile(object):
                     self.inodes[inode] = arcname
         elif stat.S_ISDIR(stmd):
             type = DIRTYPE
-            if arcname[-1:] != "/":
-                arcname += "/"
         elif stat.S_ISFIFO(stmd):
             type = FIFOTYPE
         elif stat.S_ISLNK(stmd):
@@ -1376,7 +1880,7 @@ class TarFile(object):
         tarinfo.mode = stmd
         tarinfo.uid = statres.st_uid
         tarinfo.gid = statres.st_gid
-        if stat.S_ISREG(stmd):
+        if type == REGTYPE:
             tarinfo.size = statres.st_size
         else:
             tarinfo.size = 0L
@@ -1420,7 +1924,7 @@ class TarFile(object):
                 print "%d-%02d-%02d %02d:%02d:%02d" \
                       % time.localtime(tarinfo.mtime)[:6],
 
-            print tarinfo.name,
+            print tarinfo.name + ("/" if tarinfo.isdir() else ""),
 
             if verbose:
                 if tarinfo.issym():
@@ -1429,18 +1933,24 @@ class TarFile(object):
                     print "link to", tarinfo.linkname,
             print
 
-    def add(self, name, arcname=None, recursive=True):
+    def add(self, name, arcname=None, recursive=True, exclude=None):
         """Add the file `name' to the archive. `name' may be any type of file
            (directory, fifo, symbolic link, etc.). If given, `arcname'
            specifies an alternative name for the file in the archive.
            Directories are added recursively by default. This can be avoided by
-           setting `recursive' to False.
+           setting `recursive' to False. `exclude' is a function that should
+           return True for each filename to be excluded.
         """
         self._check("aw")
 
         if arcname is None:
             arcname = name
 
+        # Exclude pathnames.
+        if exclude is not None and exclude(name):
+            self._dbg(2, "tarfile: Excluded %r" % name)
+            return
+
         # Skip if somebody tries to archive the archive...
         if self.name is not None and os.path.abspath(name) == self.name:
             self._dbg(2, "tarfile: Skipped %r" % name)
@@ -1452,8 +1962,8 @@ class TarFile(object):
             if recursive:
                 if arcname == ".":
                     arcname = ""
-                for f in os.listdir("."):
-                    self.add(f, os.path.join(arcname, f))
+                for f in os.listdir(name):
+                    self.add(f, os.path.join(arcname, f), recursive, exclude)
             return
 
         self._dbg(1, name)
@@ -1467,7 +1977,7 @@ class TarFile(object):
 
         # Append the tar header and data to the archive.
         if tarinfo.isreg():
-            f = file(name, "rb")
+            f = bltn_open(name, "rb")
             self.addfile(tarinfo, f)
             f.close()
 
@@ -1475,7 +1985,7 @@ class TarFile(object):
             self.addfile(tarinfo)
             if recursive:
                 for f in os.listdir(name):
-                    self.add(os.path.join(name, f), os.path.join(arcname, f))
+                    self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
 
         else:
             self.addfile(tarinfo)
@@ -1491,7 +2001,7 @@ class TarFile(object):
 
         tarinfo = copy.copy(tarinfo)
 
-        buf = tarinfo.tobuf(self.posix)
+        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
         self.fileobj.write(buf)
         self.offset += len(buf)
 
@@ -1527,7 +2037,7 @@ class TarFile(object):
             self.extract(tarinfo, path)
 
         # Reverse sort directories.
-        directories.sort(lambda a, b: cmp(a.name, b.name))
+        directories.sort(key=operator.attrgetter('name'))
         directories.reverse()
 
         # Set correct owner, mtime and filemode on directories.
@@ -1551,10 +2061,10 @@ class TarFile(object):
         """
         self._check("r")
 
-        if isinstance(member, TarInfo):
-            tarinfo = member
-        else:
+        if isinstance(member, basestring):
             tarinfo = self.getmember(member)
+        else:
+            tarinfo = member
 
         # Prepare the link target for makelink().
         if tarinfo.islnk():
@@ -1587,10 +2097,10 @@ class TarFile(object):
         """
         self._check("r")
 
-        if isinstance(member, TarInfo):
-            tarinfo = member
-        else:
+        if isinstance(member, basestring):
             tarinfo = self.getmember(member)
+        else:
+            tarinfo = member
 
         if tarinfo.isreg():
             return self.fileobject(self, tarinfo)
@@ -1678,7 +2188,7 @@ class TarFile(object):
         """Make a file called targetpath.
         """
         source = self.extractfile(tarinfo)
-        target = file(targetpath, "wb")
+        target = bltn_open(targetpath, "wb")
         copyfileobj(source, target)
         source.close()
         target.close()
@@ -1783,10 +2293,6 @@ class TarFile(object):
         """
         if not hasattr(os, 'utime'):
             return
-        if sys.platform == "win32" and tarinfo.isdir():
-            # According to msdn.microsoft.com, it is an error (EACCES)
-            # to use utime() on directories.
-            return
         try:
             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
         except EnvironmentError, e:
@@ -1807,177 +2313,28 @@ class TarFile(object):
         # Read the next block.
         self.fileobj.seek(self.offset)
         while True:
-            buf = self.fileobj.read(BLOCKSIZE)
-            if not buf:
-                return None
-
             try:
-                tarinfo = TarInfo.frombuf(buf)
-
-                # Set the TarInfo object's offset to the current position of the
-                # TarFile and set self.offset to the position where the data blocks
-                # should begin.
-                tarinfo.offset = self.offset
-                self.offset += BLOCKSIZE
-
-                tarinfo = self.proc_member(tarinfo)
+                tarinfo = self.tarinfo.fromtarfile(self)
+                if tarinfo is None:
+                    return
+                self.members.append(tarinfo)
 
-            except ValueError, e:
+            except HeaderError, e:
                 if self.ignore_zeros:
-                    self._dbg(2, "0x%X: empty or invalid block: %s" %
-                              (self.offset, e))
+                    self._dbg(2, "0x%X: %s" % (self.offset, e))
                     self.offset += BLOCKSIZE
                     continue
                 else:
                     if self.offset == 0:
-                        raise ReadError("empty, unreadable or compressed "
-                                        "file: %s" % e)
+                        raise ReadError(str(e))
                     return None
             break
 
-        # Some old tar programs represent a directory as a regular
-        # file with a trailing slash.
-        if tarinfo.isreg() and tarinfo.name.endswith("/"):
-            tarinfo.type = DIRTYPE
-
-        # Directory names should have a '/' at the end.
-        if tarinfo.isdir() and not tarinfo.name.endswith("/"):
-            tarinfo.name += "/"
-
-        self.members.append(tarinfo)
-        return tarinfo
-
-    #--------------------------------------------------------------------------
-    # The following are methods that are called depending on the type of a
-    # member. The entry point is proc_member() which is called with a TarInfo
-    # object created from the header block from the current offset. The
-    # proc_member() method can be overridden in a subclass to add custom
-    # proc_*() methods. A proc_*() method MUST implement the following
-    # operations:
-    # 1. Set tarinfo.offset_data to the position where the data blocks begin,
-    #    if there is data that follows.
-    # 2. Set self.offset to the position where the next member's header will
-    #    begin.
-    # 3. Return tarinfo or another valid TarInfo object.
-    def proc_member(self, tarinfo):
-        """Choose the right processing method for tarinfo depending
-           on its type and call it.
-        """
-        if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
-            return self.proc_gnulong(tarinfo)
-        elif tarinfo.type == GNUTYPE_SPARSE:
-            return self.proc_sparse(tarinfo)
-        else:
-            return self.proc_builtin(tarinfo)
-
-    def proc_builtin(self, tarinfo):
-        """Process a builtin type member or an unknown member
-           which will be treated as a regular file.
-        """
-        tarinfo.offset_data = self.offset
-        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
-            # Skip the following data blocks.
-            self.offset += self._block(tarinfo.size)
-        return tarinfo
-
-    def proc_gnulong(self, tarinfo):
-        """Process the blocks that hold a GNU longname
-           or longlink member.
-        """
-        buf = ""
-        count = tarinfo.size
-        while count > 0:
-            block = self.fileobj.read(BLOCKSIZE)
-            buf += block
-            self.offset += BLOCKSIZE
-            count -= BLOCKSIZE
-
-        # Fetch the next header and process it.
-        b = self.fileobj.read(BLOCKSIZE)
-        t = TarInfo.frombuf(b)
-        t.offset = self.offset
-        self.offset += BLOCKSIZE
-        next = self.proc_member(t)
-
-        # Patch the TarInfo object from the next header with
-        # the longname information.
-        next.offset = tarinfo.offset
-        if tarinfo.type == GNUTYPE_LONGNAME:
-            next.name = nts(buf)
-        elif tarinfo.type == GNUTYPE_LONGLINK:
-            next.linkname = nts(buf)
-
-        return next
-
-    def proc_sparse(self, tarinfo):
-        """Process a GNU sparse header plus extra headers.
-        """
-        buf = tarinfo.buf
-        sp = _ringbuffer()
-        pos = 386
-        lastpos = 0L
-        realpos = 0L
-        # There are 4 possible sparse structs in the
-        # first header.
-        for i in xrange(4):
-            try:
-                offset = nti(buf[pos:pos + 12])
-                numbytes = nti(buf[pos + 12:pos + 24])
-            except ValueError:
-                break
-            if offset > lastpos:
-                sp.append(_hole(lastpos, offset - lastpos))
-            sp.append(_data(offset, numbytes, realpos))
-            realpos += numbytes
-            lastpos = offset + numbytes
-            pos += 24
-
-        isextended = ord(buf[482])
-        origsize = nti(buf[483:495])
-
-        # If the isextended flag is given,
-        # there are extra headers to process.
-        while isextended == 1:
-            buf = self.fileobj.read(BLOCKSIZE)
-            self.offset += BLOCKSIZE
-            pos = 0
-            for i in xrange(21):
-                try:
-                    offset = nti(buf[pos:pos + 12])
-                    numbytes = nti(buf[pos + 12:pos + 24])
-                except ValueError:
-                    break
-                if offset > lastpos:
-                    sp.append(_hole(lastpos, offset - lastpos))
-                sp.append(_data(offset, numbytes, realpos))
-                realpos += numbytes
-                lastpos = offset + numbytes
-                pos += 24
-            isextended = ord(buf[504])
-
-        if lastpos < origsize:
-            sp.append(_hole(lastpos, origsize - lastpos))
-
-        tarinfo.sparse = sp
-
-        tarinfo.offset_data = self.offset
-        self.offset += self._block(tarinfo.size)
-        tarinfo.size = origsize
-
         return tarinfo
 
     #--------------------------------------------------------------------------
     # Little helper methods:
 
-    def _block(self, count):
-        """Round up a byte count by BLOCKSIZE and return it,
-           e.g. _block(834) => 1024.
-        """
-        blocks, remainder = divmod(count, BLOCKSIZE)
-        if remainder:
-            blocks += 1
-        return blocks * BLOCKSIZE
-
     def _getmember(self, name, tarinfo=None):
         """Find an archive member by name from bottom to top.
            If tarinfo is given, it is used as the starting point.
@@ -2010,8 +2367,8 @@ class TarFile(object):
         """
         if self.closed:
             raise IOError("%s is closed" % self.__class__.__name__)
-        if mode is not None and self._mode not in mode:
-            raise IOError("bad operation for mode %r" % self._mode)
+        if mode is not None and self.mode not in mode:
+            raise IOError("bad operation for mode %r" % self.mode)
 
     def __iter__(self):
         """Provide an iterator object.
@@ -2117,6 +2474,9 @@ class TarFileCompat:
        ZipFile class.
     """
     def __init__(self, file, mode="r", compression=TAR_PLAIN):
+        from warnings import warnpy3k
+        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
+                stacklevel=2)
         if compression == TAR_PLAIN:
             self.tarfile = TarFile.taropen(file, mode)
         elif compression == TAR_GZIPPED:
@@ -2150,10 +2510,10 @@ class TarFileCompat:
         except ImportError:
             from StringIO import StringIO
         import calendar
-        zinfo.name = zinfo.filename
-        zinfo.size = zinfo.file_size
-        zinfo.mtime = calendar.timegm(zinfo.date_time)
-        self.tarfile.addfile(zinfo, StringIO(bytes))
+        tinfo = TarInfo(zinfo.filename)
+        tinfo.size = len(bytes)
+        tinfo.mtime = calendar.timegm(zinfo.date_time)
+        self.tarfile.addfile(tinfo, StringIO(bytes))
     def close(self):
         self.tarfile.close()
 #class TarFileCompat
@@ -2172,4 +2532,5 @@ def is_tarfile(name):
     except TarError:
         return False
 
+bltn_open = open
 open = TarFile.open