summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal Norwitz <nnorwitz@gmail.com>2003-01-05 23:19:43 (GMT)
committerNeal Norwitz <nnorwitz@gmail.com>2003-01-05 23:19:43 (GMT)
commitb9ef4aea5eac30cfdd56578f7225e6e6907b0055 (patch)
tree4f89d18e82250f6ddf99e6f1ad76345ec6fb00c2
parent6d23b170cf7c849e61195ebc2ede3bcd3428a909 (diff)
downloadcpython-b9ef4aea5eac30cfdd56578f7225e6e6907b0055.zip
cpython-b9ef4aea5eac30cfdd56578f7225e6e6907b0055.tar.gz
cpython-b9ef4aea5eac30cfdd56578f7225e6e6907b0055.tar.bz2
SF #651082, tarfile module implementation from Lars Gustäbel
-rw-r--r--Doc/Makefile.deps1
-rw-r--r--Doc/lib/lib.tex1
-rw-r--r--Doc/lib/libtarfile.tex450
-rw-r--r--Lib/tarfile.py1927
-rw-r--r--Lib/test/test_tarfile.py253
-rw-r--r--Lib/test/testtar.tarbin0 -> 112640 bytes
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS4
8 files changed, 2637 insertions, 0 deletions
diff --git a/Doc/Makefile.deps b/Doc/Makefile.deps
index c8e0642..5654b6c 100644
--- a/Doc/Makefile.deps
+++ b/Doc/Makefile.deps
@@ -329,6 +329,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \
lib/libmmap.tex \
lib/tkinter.tex \
lib/libturtle.tex \
+ lib/libtarfile.tex \
lib/libcfgparser.tex
# LaTeX source files for Macintosh Library Modules.
diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex
index 0d202ac..ff4ff98 100644
--- a/Doc/lib/lib.tex
+++ b/Doc/lib/lib.tex
@@ -128,6 +128,7 @@ and how to embed it in other applications.
\input{libcfgparser}
\input{libfileinput}
\input{libxreadlines}
+\input{libtarfile}
\input{libcalendar}
\input{libcmd}
\input{libshlex}
diff --git a/Doc/lib/libtarfile.tex b/Doc/lib/libtarfile.tex
new file mode 100644
index 0000000..a05a9f4
--- /dev/null
+++ b/Doc/lib/libtarfile.tex
@@ -0,0 +1,450 @@
+\section{\module{tarfile} --- Read and write tar archive files}
+
+\declaremodule{standard}{tarfile}
+\modulesynopsis{Read and write tar-format archive files.}
+\versionadded{2.3}
+
+\moduleauthor{Lars Gust\"abel}{lars@gustaebel.de}
+\sectionauthor{Lars Gust\"abel}{lars@gustaebel.de}
+
+The \module{tarfile} module makes it possible to read and create tar archives.
+Some facts and figures:
+
+\begin{itemize}
+\item reads and writes \module{gzip} and \module{bzip2} compressed archives.
+\item creates POSIX 1003.1-1990 compliant or GNU tar compatible archives.
+\item reads GNU tar extensions \emph{longname}, \emph{longlink} and
+ \emph{sparse}.
+\item stores pathnames of unlimited length using GNU tar extensions.
+\item handles directories, regular files, hardlinks, symbolic links, fifos,
+ character devices and block devices and is able to acquire and
+ restore file information like timestamp, access permissions and owner.
+\item can handle tape devices.
+\end{itemize}
+
+\begin{funcdesc}{open}{\optional{name\optional{, mode
+ \optional{, fileobj\optional{, bufsize}}}}}
+ Return a \class{TarFile} object for the pathname \var{name}.
+ For detailed information on \class{TarFile} objects,
+ see \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
+
+ \var{mode} has to be a string of the form \code{'filemode[:compression]'},
+ it defaults to \code{'r'}. Here is a full list of mode combinations:
+
+ \begin{tableii}{c|l}{code}{mode}{action}
+ \lineii{'r'}{Open for reading with transparent compression (recommended).}
+ \lineii{'r:'}{Open for reading exclusively without compression.}
+ \lineii{'r:gz'}{Open for reading with gzip compression.}
+ \lineii{'r:bz2'}{Open for reading with bzip2 compression.}
+ \lineii{'a' or 'a:'}{Open for appending with no compression.}
+ \lineii{'w' or 'w:'}{Open for uncompressed writing.}
+ \lineii{'w:gz'}{Open for gzip compressed writing.}
+ \lineii{'w:bz2'}{Open for bzip2 compressed writing.}
+ \end{tableii}
+
+ Note that \code{'a:gz'} or \code{'a:bz2'} is not possible.
+ If \var{mode} is not suitable to open a certain (compressed) file for
+ reading, \exception{ReadError} is raised. Use \var{mode} \code{'r'} to
+ avoid this. If a compression method is not supported,
+ \exception{CompressionError} is raised.
+
+ If \var{fileobj} is specified, it is used as an alternative to
+ a file object opened for \var{name}.
+
+ For special purposes, there is a second format for \var{mode}:
+ \code{'filemode|[compression]'}. \code{open} will return a \class{TarFile}
+ object that processes its data as a stream of blocks. No random
+ seeking will be done on the file. If given, \var{fileobj} may be any
+ object that has a \code{read()} resp. \code{write()} method.
+ \var{bufsize} specifies the blocksize and defaults to \code{20 * 512}
+ bytes. Use this variant in combination with e.g. \code{sys.stdin}, a socket
+ file object or a tape device.
+ However, such a \class{TarFile} object is limited in that it does not allow
+ to be accessed randomly, see \citetitle{Examples} (section
+ \ref{tar-examples}).
+ The currently possible modes:
+
+ \begin{tableii}{c|l}{code}{mode}{action}
+ \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
+ \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
+ \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
+ \lineii{'w|'}{Open an uncompressed \emph{stream} for writing.}
+ \lineii{'w|gz'}{Open an gzip compressed \emph{stream} for writing.}
+ \lineii{'w|bz2'}{Open an bzip2 compressed \emph{stream} for writing.}
+ \end{tableii}
+\end{funcdesc}
+
+\begin{classdesc*}{TarFile}
+ Class for reading and writing tar archives. Do not use this
+ class directly, better use \function{open()} instead.
+ See \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
+\end{classdesc*}
+
+\begin{funcdesc}{is_tarfile}{name}
+ Return \code{True} if \var{name} is a tar archive file, that the
+ \module{tarfile} module can read.
+\end{funcdesc}
+
+\begin{classdesc}{TarFileCompat}{filename\optional{, mode\optional{,
+ compression}}}
+
+ Class for limited access to tar archives with a \code{zipfile}-like
+ interface. Please consult the documentation of \code{zipfile} for more
+ details.
+ \code{compression} must be one of the following constants:
+ \begin{datadesc}{TAR_PLAIN}
+ Constant for an uncompressed tar archive.
+ \end{datadesc}
+ \begin{datadesc}{TAR_GZIPPED}
+ Constant for a \code{gzip} compressed tar archive.
+ \end{datadesc}
+\end{classdesc}
+
+\begin{excdesc}{TarError}
+ Base class for all \module{tarfile} exceptions.
+\end{excdesc}
+
+\begin{excdesc}{ReadError}
+ Is raised when a tar archive is opened, that either cannot be handled by
+ the \module{tarfile} module or is somehow invalid.
+\end{excdesc}
+
+\begin{excdesc}{CompressionError}
+ Is raised when a compression method is not supported or when the data
+ cannot be decoded properly.
+\end{excdesc}
+
+\begin{excdesc}{StreamError}
+ Is raised for the limitations that are typical for stream-like
+ \class{TarFile} objects.
+\end{excdesc}
+
+\begin{excdesc}{ExtractError}
+ Is raised for \emph{non-fatal} errors when using \method{extract()}, but
+ only if \member{TarFile.errorlevel}\code{ == 2}.
+\end{excdesc}
+
+\begin{seealso}
+ \seemodule[module-zipfile]{zipfile}{Documentation of the \code{zipfile}
+ standard module.}
+
+ \seetitle[http://www.gnu.org/manual/tar/html_chapter/tar_8.html\#SEC118]
+ {GNU tar manual, Standard Section}{Documentation for tar archive files,
+ including GNU tar extensions.}
+\end{seealso}
+
+%-----------------
+% TarFile Objects
+%-----------------
+
+\subsection{TarFile Objects \label{tarfile-objects}}
+
+The \class{TarFile} object provides an interface to a tar archive. A tar
+archive is a sequence of blocks. An archive member (a stored file) is made up
+of a header block followed by data blocks. It is possible, to store a file in a
+tar archive several times. Each archive member is represented by a
+\class{TarInfo} object, see \citetitle{TarInfo Objects} (section
+\ref{tarinfo-objects}) for details.
+
+\begin{classdesc}{TarFile}{\optional{name
+ \optional{, mode\optional{, fileobj}}}}
+ Open an \emph{(uncompressed)} tar archive \var{name}.
+ \var{mode} is either \code{'r'} to read from an existing archive,
+ \code{'a'} to append data to an existing file or \code{'w'} to create a new
+ file overwriting an existing one. \var{mode} defaults to \code{'r'}.
+
+ If \var{fileobj} is given, it is used for reading or writing data.
+ If it can be determined, \var{mode} is overridden by \var{fileobj}'s mode.
+ \begin{notice}
+ \var{fileobj} is not closed, when \class{TarFile} is closed.
+ \end{notice}
+\end{classdesc}
+
+\begin{methoddesc}{open}{...}
+ Alternative constructor. The \function{open()} function on module level is
+ actually a shortcut to this classmethod. See section \ref{module-tarfile}
+ for details.
+\end{methoddesc}
+
+\begin{methoddesc}{getmember}{name}
+ Return a \class{TarInfo} object for member \var{name}. If \var{name} can
+ not be found in the archive, \exception{KeyError} is raised.
+ \begin{notice}
+ If a member occurs more than once in the archive, its last
+ occurence is assumed to be the most up-to-date version.
+ \end{notice}
+\end{methoddesc}
+
+\begin{methoddesc}{getmembers}{}
+ Return the members of the archive as a list of \class{TarInfo} objects.
+ The list has the same order as the members in the archive.
+\end{methoddesc}
+
+\begin{methoddesc}{getnames}{}
+ Return the members as a list of their names. It has the same order as
+ the list returned by \method{getmembers()}.
+\end{methoddesc}
+
+\begin{methoddesc}{list}{verbose=True}
+ Print a table of contents to \code{sys.stdout}. If \var{verbose} is
+ \code{False}, only the names of the members are printed. If it is
+ \code{True}, an \code{"ls -l"}-like output is produced.
+\end{methoddesc}
+
+\begin{methoddesc}{next}{}
+ Return the next member of the archive as a \class{TarInfo} object, when
+ \class{TarFile} is opened for reading. Return \code{None} if there is no
+ more available.
+\end{methoddesc}
+
+\begin{methoddesc}{extract}{member\optional{, path}}
+ Extract a member from the archive to the current working directory,
+ using its full name. Its file information is extracted as accurately as
+ possible.
+ \var{member} may be a filename or a \class{TarInfo} object.
+ You can specify a different directory using \var{path}.
+\end{methoddesc}
+
+\begin{methoddesc}{extractfile}{member}
+ Extract a member from the archive as a file object.
+ \var{member} may be a filename or a \class{TarInfo} object.
+ If \var{member} is a regular file, a file-like object is returned.
+ If \var{member} is a link, a file-like object is constructed from the
+ link's target.
+ If \var{member} is none of the above, \code{None} is returned.
+ \begin{notice}
+ The file-like object is read-only and provides the following methods:
+ \method{read()}, \method{readline()}, \method{readlines()},
+ \method{seek()}, \method{tell()}.
+ \end{notice}
+\end{methoddesc}
+
+\begin{methoddesc}{add}{name\optional{, arcname\optional{, recursive=True}}}
+ Add the file \var{name} to the archive. \var{name} may be any type
+ of file (directory, fifo, symbolic link, etc.).
+ If given, \var{arcname} specifies an alternative name for the file in the
+ archive. Directories are added recursively by default.
+ This can be avoided by setting \var{recursive} to \code{False}.
+\end{methoddesc}
+
+\begin{methoddesc}{addfile}{tarinfo\optional{, fileobj}}
+ Add the \class{TarInfo} object \var{tarinfo} to the archive.
+ If \var{fileobj} is given, \code{tarinfo.size} bytes are read
+ from it and added to the archive. You can create \class{TarInfo} objects
+ using \method{gettarinfo()}.
+ \begin{notice}
+ On Windows platforms, \var{fileobj} should always be opened with mode
+ \code{'rb'} to avoid irritation about the file size.
+ \end{notice}
+\end{methoddesc}
+
+\begin{methoddesc}{gettarinfo}{\optional{name\optional{, arcname
+ \optional{, fileobj}}}}
+ Create a \class{TarInfo} object for either the file \var{name} or the
+ file object \var{fileobj} (using \code{os.fstat()} on its file descriptor).
+ You can modify some of the \class{TarInfo}'s attributes before you add it
+ using \method{addfile()}.
+ If given, \var{arcname} specifies an alternative name for the file in the
+ archive.
+\end{methoddesc}
+
+\begin{methoddesc}{close}{}
+ Close the \class{TarFile}. In write-mode, two finishing zero blocks are
+ appended to the archive.
+\end{methoddesc}
+
+\begin{memberdesc}{posix=True}
+ If \code{True}, create a POSIX 1003.1-1990 compliant archive. GNU
+ extensions are not used, because they are not part of the POSIX standard.
+ This limits the length of filenames to at most 256 and linknames to 100
+ characters. A \exception{ValueError} is raised, if a pathname exceeds this
+ limit.
+ If \code{False}, create a GNU tar compatible archive. It will not be POSIX
+ compliant, but can store pathnames of unlimited length.
+\end{memberdesc}
+
+\begin{memberdesc}{dereference=False}
+ If \code{False}, add symbolic and hard links to archive. If \code{True},
+ add the content of the target files to the archive. This has no effect on
+ systems that do not support links.
+\end{memberdesc}
+
+\begin{memberdesc}{ignore_zeros=False}
+ If \code{False}, treat an empty block as the end of the archive. If
+ \code{True}, skip empty (and invalid) blocks and try to get as many
+ members as possible. This is only useful for concatenated or damaged
+ archives.
+\end{memberdesc}
+
+\begin{memberdesc}{debug=0}
+ To be set from \code{0}(no debug messages) up to \code{3}(all debug
+ messages). The messages are written to \code{sys.stdout}.
+\end{memberdesc}
+
+\begin{memberdesc}{errorlevel=0}
+ If \code{0}, all errors are ignored when using \method{extract()}.
+ Nevertheless, they appear as error messages in the debug output, when
+ debugging is enabled.
+ If \code{1}, all \emph{fatal} errors are raised as \exception{OSError}
+ or \exception{IOError} exceptions.
+ If \code{2}, all \emph{non-fatal} errors are raised as \exception{TarError}
+ exceptions as well.
+\end{memberdesc}
+
+%-----------------
+% TarInfo Objects
+%-----------------
+
+\subsection{TarInfo Objects \label{tarinfo-objects}}
+
+A \class{TarInfo} object represents one member in a \class{TarFile}. Aside from
+storing all required attributes of a file (like file type, size, time,
+permissions, owner etc.), it provides some useful methods to determine its
+type. It does \emph{not} contain the file's data itself.
+
+\class{TarInfo} objects are returned by \code{TarFile}'s methods
+\code{getmember()}, \code{getmembers()} and \code{gettarinfo()}.
+
+\begin{classdesc}{TarInfo}{\optional{name}}
+ Create a \class{TarInfo} object.
+\end{classdesc}
+
+\begin{methoddesc}{frombuf}{}
+ Create and return a \class{TarInfo} object from a string buffer.
+\end{methoddesc}
+
+\begin{methoddesc}{tobuf}{}
+ Create a string buffer from a \class{TarInfo} object.
+\end{methoddesc}
+
+A \code{TarInfo} object has the following public data attributes:
+\begin{memberdesc}{name}
+ Name of the archive member.
+\end{memberdesc}
+
+\begin{memberdesc}{size}
+ Size in bytes.
+\end{memberdesc}
+
+\begin{memberdesc}{mtime}
+ Time of last modification.
+\end{memberdesc}
+
+\begin{memberdesc}{mode}
+ Permission bits.
+\end{memberdesc}
+
+\begin{memberdesc}{type}
+ File type.
+ \var{type} is usually one of these constants:
+ \code{REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, CONTTYPE,
+ CHRTYPE, BLKTYPE, GNUTYPE_SPARSE}.
+ To determine the type of a \class{TarInfo} object more conveniently, use
+ the \code{is_*()} methods below.
+\end{memberdesc}
+
+\begin{memberdesc}{linkname}
+ Name of the target file name, which is only present in \class{TarInfo}
+ objects of type LNKTYPE and SYMTYPE.
+\end{memberdesc}
+
+\begin{memberdesc}{uid, gid}
+ User and group ID of who originally stored this member.
+\end{memberdesc}
+
+\begin{memberdesc}{uname, gname}
+ User and group name.
+\end{memberdesc}
+
+A \class{TarInfo} object also provides some convenient query methods:
+\begin{methoddesc}{isfile}{}
+ Return \code{True} if the \class{Tarinfo} object is a regular file.
+\end{methoddesc}
+
+\begin{methoddesc}{isreg}{}
+ Same as \method{isfile()}.
+\end{methoddesc}
+
+\begin{methoddesc}{isdir}{}
+ Return \code{True} if it is a directory.
+\end{methoddesc}
+
+\begin{methoddesc}{issym}{}
+ Return \code{True} if it is a symbolic link.
+\end{methoddesc}
+
+\begin{methoddesc}{islnk}{}
+ Return \code{True} if it is a hard link.
+\end{methoddesc}
+
+\begin{methoddesc}{ischr}{}
+ Return \code{True} if it is a character device.
+\end{methoddesc}
+
+\begin{methoddesc}{isblk}{}
+ Return \code{True} if it is a block device.
+\end{methoddesc}
+
+\begin{methoddesc}{isfifo}{}
+ Return \code{True} if it is a FIFO.
+\end{methoddesc}
+
+\begin{methoddesc}{isdev}{}
+ Return \code{True} if it is one of character device, block device or FIFO.
+\end{methoddesc}
+
+%------------------------
+% Examples
+%------------------------
+
+\subsection{Examples \label{tar-examples}}
+
+How to create an uncompressed tar archive from a list of filenames:
+\begin{verbatim}
+import tarfile
+tar = tarfile.open("sample.tar", "w")
+for name in ["foo", "bar", "quux"]:
+ tar.add(name)
+tar.close()
+\end{verbatim}
+
+How to read a gzip compressed tar archive and display some member information:
+\begin{verbatim}
+import tarfile
+tar = tarfile.open("sample.tar.gz", "r:gz")
+for tarinfo in tar:
+ print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
+ if tarinfo.isreg():
+ print "a regular file."
+ elif tarinfo.isdir():
+ print "a directory."
+ else:
+ print "something else."
+tar.close()
+\end{verbatim}
+
+How to create a tar archive with faked information:
+\begin{verbatim}
+import tarfile
+tar = tarfile.open("sample.tar.gz", "w:gz")
+for name in namelist:
+ tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name)
+ tarinfo.uid = 123
+ tarinfo.gid = 456
+ tarinfo.uname = "johndoe"
+ tarinfo.gname = "fake"
+ tar.addfile(tarinfo, file(name))
+tar.close()
+\end{verbatim}
+
+The \emph{only} way to extract an uncompressed tar stream from
+\code{sys.stdin}:
+\begin{verbatim}
+import sys
+import tarfile
+tar = tarfile.open(mode="r|", fileobj=sys.stdin)
+for tarinfo in tar:
+ tar.extract(tarinfo)
+tar.close()
+\end{verbatim}
+
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
new file mode 100644
index 0000000..3e334be
--- /dev/null
+++ b/Lib/tarfile.py
@@ -0,0 +1,1927 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+#-------------------------------------------------------------------
+# tarfile.py
+#-------------------------------------------------------------------
+# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
+# All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+"""Read from and write to tar format archives.
+"""
+
+__version__ = "$Revision$"
+# $Source$
+
+version = "0.6.4"
+__author__ = "Lars Gustäbel (lars@gustaebel.de)"
+__date__ = "$Date$"
+__cvsid__ = "$Id$"
+__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
+
+#---------
+# Imports
+#---------
+import sys
+import os
+import shutil
+import stat
+import errno
+import time
+import struct
+
+try:
+ import grp, pwd
+except ImportError:
+ grp = pwd = None
+
+# from tarfile import *
+__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
+
+#---------------------------------------------------------
+# tar constants
+#---------------------------------------------------------
+NUL = "\0" # the null character
+BLOCKSIZE = 512 # length of processing blocks
+RECORDSIZE = BLOCKSIZE * 20 # length of records
+MAGIC = "ustar" # magic tar string
+VERSION = "00" # version number
+
+LENGTH_NAME = 100 # maximum length of a filename
+LENGTH_LINK = 100 # maximum length of a linkname
+LENGTH_PREFIX = 155 # maximum length of the prefix field
+MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
+
+REGTYPE = "0" # regular file
+AREGTYPE = "\0" # regular file
+LNKTYPE = "1" # link (inside tarfile)
+SYMTYPE = "2" # symbolic link
+CHRTYPE = "3" # character special device
+BLKTYPE = "4" # block special device
+DIRTYPE = "5" # directory
+FIFOTYPE = "6" # fifo special device
+CONTTYPE = "7" # contiguous file
+
+GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
+GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
+GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
+
+#---------------------------------------------------------
+# tarfile constants
+#---------------------------------------------------------
+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
+ SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
+ CONTTYPE, CHRTYPE, BLKTYPE,
+ GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+ GNUTYPE_SPARSE)
+
+REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
+ CONTTYPE, GNUTYPE_SPARSE) # represent regular files
+
+#---------------------------------------------------------
+# Bits used in the mode field, values in octal.
+#---------------------------------------------------------
+S_IFLNK = 0120000 # symbolic link
+S_IFREG = 0100000 # regular file
+S_IFBLK = 0060000 # block device
+S_IFDIR = 0040000 # directory
+S_IFCHR = 0020000 # character device
+S_IFIFO = 0010000 # fifo
+
+TSUID = 04000 # set UID on execution
+TSGID = 02000 # set GID on execution
+TSVTX = 01000 # reserved
+
+TUREAD = 0400 # read by owner
+TUWRITE = 0200 # write by owner
+TUEXEC = 0100 # execute/search by owner
+TGREAD = 0040 # read by group
+TGWRITE = 0020 # write by group
+TGEXEC = 0010 # execute/search by group
+TOREAD = 0004 # read by other
+TOWRITE = 0002 # write by other
+TOEXEC = 0001 # execute/search by other
+
+#---------------------------------------------------------
+# Some useful functions
+#---------------------------------------------------------
+def nts(s):
+ """Convert a null-terminated string buffer to a python string.
+ """
+ return s.split(NUL, 1)[0]
+
+def calc_chksum(buf):
+ """Calculate the checksum for a member's header. It's a simple addition
+ of all bytes, treating the chksum field as if filled with spaces.
+ buf is a 512 byte long string buffer which holds the header.
+ """
+ chk = 256 # chksum field is treated as blanks,
+ # so the initial value is 8 * ord(" ")
+ for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
+ for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
+ return chk
+
+def copyfileobj(src, dst, length=None):
+ """Copy length bytes from fileobj src to fileobj dst.
+ If length is None, copy the entire content.
+ """
+ if length == 0:
+ return
+ if length is None:
+ shutil.copyfileobj(src, dst)
+ return
+
+ BUFSIZE = 16 * 1024
+ blocks, remainder = divmod(length, BUFSIZE)
+ for b in xrange(blocks):
+ buf = src.read(BUFSIZE)
+ if len(buf) < BUFSIZE:
+ raise IOError, "end of file reached"
+ dst.write(buf)
+
+ if remainder != 0:
+ buf = src.read(remainder)
+ if len(buf) < remainder:
+ raise IOError, "end of file reached"
+ dst.write(buf)
+ return
+
+filemode_table = (
+ (S_IFLNK, "l",
+ S_IFREG, "-",
+ S_IFBLK, "b",
+ S_IFDIR, "d",
+ S_IFCHR, "c",
+ S_IFIFO, "p"),
+ (TUREAD, "r"),
+ (TUWRITE, "w"),
+ (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
+ (TGREAD, "r"),
+ (TGWRITE, "w"),
+ (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
+ (TOREAD, "r"),
+ (TOWRITE, "w"),
+ (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
+
+def filemode(mode):
+ """Convert a file's mode to a string of the form
+ -rwxrwxrwx.
+ Used by TarFile.list()
+ """
+ s = ""
+ for t in filemode_table:
+ while True:
+ if mode & t[0] == t[0]:
+ s += t[1]
+ elif len(t) > 2:
+ t = t[2:]
+ continue
+ else:
+ s += "-"
+ break
+ return s
+
+if os.sep != "/":
+ normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
+else:
+ normpath = os.path.normpath
+
+class TarError(Exception):
+ """Base exception."""
+ pass
+class ExtractError(TarError):
+ """General exception for extract errors."""
+ pass
+class ReadError(TarError):
+ """Exception for unreadble tar archives."""
+ pass
+class CompressionError(TarError):
+ """Exception for unavailable compression methods."""
+ pass
+class StreamError(TarError):
+ """Exception for unsupported operations on stream-like TarFiles."""
+ pass
+
+#---------------------------
+# internal stream interface
+#---------------------------
+class _LowLevelFile:
+ """Low-level file object. Supports reading and writing.
+ It is used instead of a regular file object for streaming
+ access.
+ """
+
+ def __init__(self, name, mode):
+ mode = {
+ "r": os.O_RDONLY,
+ "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+ }[mode]
+ if hasattr(os, "O_BINARY"):
+ mode |= os.O_BINARY
+ self.fd = os.open(name, mode)
+
+ def close(self):
+ os.close(self.fd)
+
+ def read(self, size):
+ return os.read(self.fd, size)
+
+ def write(self, s):
+ os.write(self.fd, s)
+
+class _Stream:
+ """Class that serves as an adapter between TarFile and
+ a stream-like object. The stream-like object only
+ needs to have a read() or write() method and is accessed
+ blockwise. Use of gzip or bzip2 compression is possible.
+ A stream-like object could be for example: sys.stdin,
+ sys.stdout, a socket, a tape device etc.
+
+ _Stream is intended to be used only internally.
+ """
+
+ def __init__(self, name, mode, type, fileobj, bufsize):
+ """Construct a _Stream object.
+ """
+ self._extfileobj = True
+ if fileobj is None:
+ fileobj = _LowLevelFile(name, mode)
+ self._extfileobj = False
+
+ self.name = name or ""
+ self.mode = mode
+ self.type = type
+ self.fileobj = fileobj
+ self.bufsize = bufsize
+ self.buf = ""
+ self.pos = 0L
+ self.closed = False
+
+ if type == "gz":
+ try:
+ import zlib
+ except ImportError:
+ raise CompressionError, "zlib module is not available"
+ self.zlib = zlib
+ self.crc = zlib.crc32("")
+ if mode == "r":
+ self._init_read_gz()
+ else:
+ self._init_write_gz()
+
+ if type == "bz2":
+ try:
+ import bz2
+ except ImportError:
+ raise CompressionError, "bz2 module is not available"
+ if mode == "r":
+ self.dbuf = ""
+ self.cmp = bz2.BZ2Decompressor()
+ else:
+ self.cmp = bz2.BZ2Compressor()
+
+ def __del__(self):
+ if not self.closed:
+ self.close()
+
+ def _init_write_gz(self):
+ """Initialize for writing with gzip compression.
+ """
+ self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
+ -self.zlib.MAX_WBITS,
+ self.zlib.DEF_MEM_LEVEL,
+ 0)
+ timestamp = struct.pack("<L", long(time.time()))
+ self.__write("\037\213\010\010%s\002\377" % timestamp)
+ if self.name.endswith(".gz"):
+ self.name = self.name[:-3]
+ self.__write(self.name + NUL)
+
+ def write(self, s):
+ """Write string s to the stream.
+ """
+ if self.type == "gz":
+ self.crc = self.zlib.crc32(s, self.crc)
+ self.pos += len(s)
+ if self.type != "tar":
+ s = self.cmp.compress(s)
+ self.__write(s)
+
+ def __write(self, s):
+ """Write string s to the stream if a whole new block
+ is ready to be written.
+ """
+ self.buf += s
+ while len(self.buf) > self.bufsize:
+ self.fileobj.write(self.buf[:self.bufsize])
+ self.buf = self.buf[self.bufsize:]
+
+ def close(self):
+ """Close the _Stream object. No operation should be
+ done on it afterwards.
+ """
+ if self.closed:
+ return
+
+ if self.mode == "w" and self.buf:
+ if self.type != "tar":
+ self.buf += self.cmp.flush()
+ self.fileobj.write(self.buf)
+ self.buf = ""
+ if self.type == "gz":
+ self.fileobj.write(struct.pack("<l", self.crc))
+ self.fileobj.write(struct.pack("<L", self.pos))
+
+ if not self._extfileobj:
+ self.fileobj.close()
+
+ self.closed = True
+
+ def _init_read_gz(self):
+ """Initialize for reading a gzip compressed fileobj.
+ """
+ self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
+ self.dbuf = ""
+
+ # taken from gzip.GzipFile with some alterations
+ if self.__read(2) != "\037\213":
+ raise ReadError, "not a gzip file"
+ if self.__read(1) != "\010":
+ raise CompressionError, "unsupported compression method"
+
+ flag = ord(self.__read(1))
+ self.__read(6)
+
+ if flag & 4:
+ xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
+ self.read(xlen)
+ if flag & 8:
+ while True:
+ s = self.__read(1)
+ if not s or s == NUL:
+ break
+ if flag & 16:
+ while True:
+ s = self.__read(1)
+ if not s or s == NUL:
+ break
+ if flag & 2:
+ self.__read(2)
+
+ def tell(self):
+ """Return the stream's file pointer position.
+ """
+ return self.pos
+
+ def seek(self, pos=0):
+ """Set the stream's file pointer to pos. Negative seeking
+ is forbidden.
+ """
+ if pos - self.pos >= 0:
+ blocks, remainder = divmod(pos - self.pos, self.bufsize)
+ for i in xrange(blocks):
+ self.read(self.bufsize)
+ self.read(remainder)
+ else:
+ raise StreamError, "seeking backwards is not allowed"
+ return self.pos
+
+ def read(self, size=None):
+ """Return the next size number of bytes from the stream.
+ If size is not defined, return all bytes of the stream
+ up to EOF.
+ """
+ if size is None:
+ t = []
+ while True:
+ buf = self._read(self.bufsize)
+ if not buf:
+ break
+ t.append(buf)
+ buf = "".join(t)
+ else:
+ buf = self._read(size)
+ self.pos += len(buf)
+ return buf
+
+ def _read(self, size):
+ """Return size bytes from the stream.
+ """
+ if self.type == "tar":
+ return self.__read(size)
+
+ c = len(self.dbuf)
+ t = [self.dbuf]
+ while c < size:
+ buf = self.__read(self.bufsize)
+ if not buf:
+ break
+ buf = self.cmp.decompress(buf)
+ t.append(buf)
+ c += len(buf)
+ t = "".join(t)
+ self.dbuf = t[size:]
+ return t[:size]
+
+ def __read(self, size):
+ """Return size bytes from stream. If internal buffer is empty,
+ read another block from the stream.
+ """
+ c = len(self.buf)
+ t = [self.buf]
+ while c < size:
+ buf = self.fileobj.read(self.bufsize)
+ if not buf:
+ break
+ t.append(buf)
+ c += len(buf)
+ t = "".join(t)
+ self.buf = t[size:]
+ return t[:size]
+# class _Stream
+
+#------------------------
+# Extraction file object
+#------------------------
+class ExFileObject(object):
+ """File-like object for reading an archive member.
+ Is returned by TarFile.extractfile(). Support for
+ sparse files included.
+ """
+
+ def __init__(self, tarfile, tarinfo):
+ self.fileobj = tarfile.fileobj
+ self.name = tarinfo.name
+ self.mode = "r"
+ self.closed = False
+ self.offset = tarinfo.offset_data
+ self.size = tarinfo.size
+ self.pos = 0L
+ self.linebuffer = ""
+ if tarinfo.issparse():
+ self.sparse = tarinfo.sparse
+ self.read = self._readsparse
+ else:
+ self.read = self._readnormal
+
+ def __read(self, size):
+ """Overloadable read method.
+ """
+ return self.fileobj.read(size)
+
+ def readline(self, size=-1):
+ """Read a line with approx. size. If size is negative,
+ read a whole line. readline() and read() must not
+ be mixed up (!).
+ """
+ if size < 0:
+ size = sys.maxint
+
+ nl = self.linebuffer.find("\n")
+ if nl >= 0:
+ nl = min(nl, size)
+ else:
+ size -= len(self.linebuffer)
+ while nl < 0:
+ buf = self.read(min(size, 100))
+ if not buf:
+ break
+ self.linebuffer += buf
+ size -= len(buf)
+ if size <= 0:
+ break
+ nl = self.linebuffer.find("\n")
+ if nl == -1:
+ s = self.linebuffer
+ self.linebuffer = ""
+ return s
+ buf = self.linebuffer[:nl]
+ self.linebuffer = self.linebuffer[nl + 1:]
+ while buf[-1:] == "\r":
+ buf = buf[:-1]
+ return buf + "\n"
+
+ def readlines(self):
+ """Return a list with all (following) lines.
+ """
+ result = []
+ while True:
+ line = self.readline()
+ if not line: break
+ result.append(line)
+ return result
+
+ def _readnormal(self, size=None):
+ """Read operation for regular files.
+ """
+ if self.closed:
+ raise ValueError, "file is closed"
+ self.fileobj.seek(self.offset + self.pos)
+ bytesleft = self.size - self.pos
+ if size is None:
+ bytestoread = bytesleft
+ else:
+ bytestoread = min(size, bytesleft)
+ self.pos += bytestoread
+ return self.__read(bytestoread)
+
+ def _readsparse(self, size=None):
+ """Read operation for sparse files.
+ """
+ if self.closed:
+ raise ValueError, "file is closed"
+
+ if size is None:
+ size = self.size - self.pos
+
+ data = []
+ while size > 0:
+ buf = self._readsparsesection(size)
+ if not buf:
+ break
+ size -= len(buf)
+ data.append(buf)
+ return "".join(data)
+
+ def _readsparsesection(self, size):
+ """Read a single section of a sparse file.
+ """
+ section = self.sparse.find(self.pos)
+
+ if section is None:
+ return ""
+
+ toread = min(size, section.offset + section.size - self.pos)
+ if isinstance(section, _data):
+ realpos = section.realpos + self.pos - section.offset
+ self.pos += toread
+ self.fileobj.seek(self.offset + realpos)
+ return self.__read(toread)
+ else:
+ self.pos += toread
+ return NUL * toread
+
+ def tell(self):
+ """Return the current file position.
+ """
+ return self.pos
+
+ def seek(self, pos, whence=0):
+ """Seek to a position in the file.
+ """
+ self.linebuffer = ""
+ if whence == 0:
+ self.pos = min(max(pos, 0), self.size)
+ if whence == 1:
+ if pos < 0:
+ self.pos = max(self.pos + pos, 0)
+ else:
+ self.pos = min(self.pos + pos, self.size)
+ if whence == 2:
+ self.pos = max(min(self.size + pos, self.size), 0)
+
+ def close(self):
+ """Close the file object.
+ """
+ self.closed = True
+#class ExFileObject
+
+#------------------
+# Exported Classes
+#------------------
+class TarInfo(object):
+ """Informational class which holds the details about an
+ archive member given by a tar header block.
+ TarInfo objects are returned by TarFile.getmember(),
+ TarFile.getmembers() and TarFile.gettarinfo() and are
+ usually created internally.
+ """
+
+ def __init__(self, name=""):
+ """Construct a TarInfo object. name is the optional name
+ of the member.
+ """
+
+ self.name = name # member name (dirnames must end with '/')
+ self.mode = 0666 # file permissions
+ self.uid = 0 # user id
+ self.gid = 0 # group id
+ self.size = 0 # file size
+ self.mtime = 0 # modification time
+ self.chksum = 0 # header checksum
+ self.type = REGTYPE # member type
+ self.linkname = "" # link name
+ self.uname = "user" # user name
+ self.gname = "group" # group name
+ self.devmajor = 0 #-
+ self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
+ self.prefix = "" # prefix to filename or holding information
+ # about sparse files
+
+ self.offset = 0 # the tar header starts here
+ self.offset_data = 0 # the file's data starts here
+
+ def __repr__(self):
+ return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
+
+ def frombuf(cls, buf):
+ """Construct a TarInfo object from a 512 byte string buffer.
+ """
+ tarinfo = cls()
+ tarinfo.name = nts(buf[0:100])
+ tarinfo.mode = int(buf[100:108], 8)
+ tarinfo.uid = int(buf[108:116],8)
+ tarinfo.gid = int(buf[116:124],8)
+ tarinfo.size = long(buf[124:136], 8)
+ tarinfo.mtime = long(buf[136:148], 8)
+ tarinfo.chksum = int(buf[148:156], 8)
+ tarinfo.type = buf[156:157]
+ tarinfo.linkname = nts(buf[157:257])
+ tarinfo.uname = nts(buf[265:297])
+ tarinfo.gname = nts(buf[297:329])
+ try:
+ tarinfo.devmajor = int(buf[329:337], 8)
+ tarinfo.devminor = int(buf[337:345], 8)
+ except ValueError:
+ tarinfo.devmajor = tarinfo.devmajor = 0
+
+ # The prefix field is used for filenames > 100 in
+ # the POSIX standard.
+ # name = prefix + "/" + name
+ prefix = buf[345:500]
+ while prefix and prefix[-1] == NUL:
+ prefix = prefix[:-1]
+ if len(prefix.split(NUL)) == 1:
+ tarinfo.prefix = prefix
+ tarinfo.name = normpath(os.path.join(tarinfo.prefix, tarinfo.name))
+ else:
+ tarinfo.prefix = buf[345:500]
+
+ # Directory names should have a '/' at the end.
+ if tarinfo.isdir() and tarinfo.name[-1:] != "/":
+ tarinfo.name += "/"
+ return tarinfo
+
+ frombuf = classmethod(frombuf)
+
+ def tobuf(self):
+ """Return a tar header block as a 512 byte string.
+ """
+ name = self.name
+
+ # The following code was contributed by Detlef Lannert.
+ parts = []
+ for value, fieldsize in (
+ (name, 100),
+ ("%07o" % (self.mode & 07777), 8),
+ ("%07o" % self.uid, 8),
+ ("%07o" % self.gid, 8),
+ ("%011o" % self.size, 12),
+ ("%011o" % self.mtime, 12),
+ (" ", 8),
+ (self.type, 1),
+ (self.linkname, 100),
+ (MAGIC, 6),
+ (VERSION, 2),
+ (self.uname, 32),
+ (self.gname, 32),
+ ("%07o" % self.devmajor, 8),
+ ("%07o" % self.devminor, 8),
+ (self.prefix, 155)
+ ):
+ l = len(value)
+ parts.append(value + (fieldsize - l) * NUL)
+
+ buf = "".join(parts)
+ chksum = calc_chksum(buf)
+ buf = buf[:148] + "%06o\0" % chksum + buf[155:]
+ buf += (BLOCKSIZE - len(buf)) * NUL
+ self.buf = buf
+ return buf
+
+ def isreg(self):
+ return self.type in REGULAR_TYPES
+ def isfile(self):
+ return self.isreg()
+ def isdir(self):
+ return self.type == DIRTYPE
+ def issym(self):
+ return self.type == SYMTYPE
+ def islnk(self):
+ return self.type == LNKTYPE
+ def ischr(self):
+ return self.type == CHRTYPE
+ def isblk(self):
+ return self.type == BLKTYPE
+ def isfifo(self):
+ return self.type == FIFOTYPE
+ def issparse(self):
+ return self.type == GNUTYPE_SPARSE
+ def isdev(self):
+ return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
+# class TarInfo
+
+class TarFile(object):
+ """The TarFile Class provides an interface to tar archives.
+ """
+
+ debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
+
+ dereference = False # If true, add content of linked file to the
+ # tar file, else the link.
+
+ ignore_zeros = False # If true, skips empty or invalid blocks and
+ # continues processing.
+
+ errorlevel = 0 # If 0, fatal errors only appear in debug
+ # messages (if debug >= 0). If > 0, errors
+ # are passed to the caller as exceptions.
+
+ posix = True # If True, generates POSIX.1-1990-compliant
+ # archives (no GNU extensions!)
+
+ fileobject = ExFileObject
+
+ def __init__(self, name=None, mode="r", fileobj=None):
+ """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
+ read from an existing archive, 'a' to append data to an existing
+ file or 'w' to create a new file overwriting an existing one. `mode'
+ defaults to 'r'.
+ If `fileobj' is given, it is used for reading or writing data. If it
+ can be determined, `mode' is overridden by `fileobj's mode.
+ `fileobj' is not closed, when TarFile is closed.
+ """
+ self.name = name
+
+ if len(mode) > 1 or mode not in "raw":
+ raise ValueError, "mode must be 'r', 'a' or 'w'"
+ self._mode = mode
+ self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
+
+ if not fileobj:
+ fileobj = file(self.name, self.mode)
+ self._extfileobj = False
+ else:
+ if self.name is None and hasattr(fileobj, "name"):
+ self.name = fileobj.name
+ if hasattr(fileobj, "mode"):
+ self.mode = fileobj.mode
+ self._extfileobj = True
+ self.fileobj = fileobj
+
+ # Init datastructures
+ self.closed = False
+ self.members = [] # list of members as TarInfo objects
+ self.membernames = [] # names of members
+ self.chunks = [0] # chunk cache
+ self._loaded = False # flag if all members have been read
+ self.offset = 0L # current position in the archive file
+ self.inodes = {} # dictionary caching the inodes of
+ # archive members already added
+
+ if self._mode == "r":
+ self.firstmember = None
+ self.firstmember = self.next()
+
+ if self._mode == "a":
+ # Move to the end of the archive,
+ # before the first empty block.
+ self.firstmember = None
+ while True:
+ try:
+ tarinfo = self.next()
+ except ReadError:
+ self.fileobj.seek(0)
+ break
+ if tarinfo is None:
+ self.fileobj.seek(- BLOCKSIZE, 1)
+ break
+
+ if self._mode in "aw":
+ self._loaded = True
+
+ #--------------------------------------------------------------------------
+ # Below are the classmethods which act as alternate constructors to the
+ # TarFile class. The open() method is the only one that is needed for
+ # public use; it is the "super"-constructor and is able to select an
+ # adequate "sub"-constructor for a particular compression using the mapping
+ # from OPEN_METH.
+ #
+ # This concept allows one to subclass TarFile without losing the comfort of
+ # the super-constructor. A sub-constructor is registered and made available
+ # by adding it to the mapping in OPEN_METH.
+
+ def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
+ """Open a tar archive for reading, writing or appending. Return
+ an appropriate TarFile class.
+
+ mode:
+ 'r' open for reading with transparent compression
+ 'r:' open for reading exclusively uncompressed
+ 'r:gz' open for reading with gzip compression
+ 'r:bz2' open for reading with bzip2 compression
+ 'a' or 'a:' open for appending
+ 'w' or 'w:' open for writing without compression
+ 'w:gz' open for writing with gzip compression
+ 'w:bz2' open for writing with bzip2 compression
+ 'r|' open an uncompressed stream of tar blocks for reading
+ 'r|gz' open a gzip compressed stream of tar blocks
+ 'r|bz2' open a bzip2 compressed stream of tar blocks
+ 'w|' open an uncompressed stream for writing
+ 'w|gz' open a gzip compressed stream for writing
+ 'w|bz2' open a bzip2 compressed stream for writing
+ """
+
+ if not name and not fileobj:
+ raise ValueError, "nothing to open"
+
+ if ":" in mode:
+ filemode, comptype = mode.split(":", 1)
+ filemode = filemode or "r"
+ comptype = comptype or "tar"
+
+ # Select the *open() function according to
+ # given compression.
+ if comptype in cls.OPEN_METH:
+ func = getattr(cls, cls.OPEN_METH[comptype])
+ else:
+ raise CompressionError, "unknown compression type %r" % comptype
+ return func(name, filemode, fileobj)
+
+ elif "|" in mode:
+ filemode, comptype = mode.split("|", 1)
+ filemode = filemode or "r"
+ comptype = comptype or "tar"
+
+ if filemode not in "rw":
+ raise ValueError, "mode must be 'r' or 'w'"
+
+ t = cls(name, filemode,
+ _Stream(name, filemode, comptype, fileobj, bufsize))
+ t._extfileobj = False
+ return t
+
+ elif mode == "r":
+ # Find out which *open() is appropriate for opening the file.
+ for comptype in cls.OPEN_METH:
+ func = getattr(cls, cls.OPEN_METH[comptype])
+ try:
+ return func(name, "r", fileobj)
+ except (ReadError, CompressionError):
+ continue
+ raise ReadError, "file could not be opened successfully"
+
+ elif mode in "aw":
+ return cls.taropen(name, mode, fileobj)
+
+ raise ValueError, "undiscernible mode"
+
+ open = classmethod(open)
+
+ def taropen(cls, name, mode="r", fileobj=None):
+ """Open uncompressed tar archive name for reading or writing.
+ """
+ if len(mode) > 1 or mode not in "raw":
+ raise ValueError, "mode must be 'r', 'a' or 'w'"
+ return cls(name, mode, fileobj)
+
+ taropen = classmethod(taropen)
+
+ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
+ """Open gzip compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if len(mode) > 1 or mode not in "rw":
+ raise ValueError, "mode must be 'r' or 'w'"
+
+ try:
+ import gzip
+ except ImportError:
+ raise CompressionError, "gzip module is not available"
+
+ pre, ext = os.path.splitext(name)
+ pre = os.path.basename(pre)
+ if ext == ".tgz":
+ ext = ".tar"
+ if ext == ".gz":
+ ext = ""
+ tarname = pre + ext
+
+ if fileobj is None:
+ fileobj = file(name, mode + "b")
+
+ if mode != "r":
+ name = tarname
+
+ try:
+ t = cls.taropen(tarname, mode,
+ gzip.GzipFile(name, mode, compresslevel, fileobj)
+ )
+ except IOError:
+ raise ReadError, "not a gzip file"
+ t._extfileobj = False
+ return t
+
+ gzopen = classmethod(gzopen)
+
+ def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
+ """Open bzip2 compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if len(mode) > 1 or mode not in "rw":
+ raise ValueError, "mode must be 'r' or 'w'."
+
+ try:
+ import bz2
+ except ImportError:
+ raise CompressionError, "bz2 module is not available"
+
+ pre, ext = os.path.splitext(name)
+ pre = os.path.basename(pre)
+ if ext == ".tbz2":
+ ext = ".tar"
+ if ext == ".bz2":
+ ext = ""
+ tarname = pre + ext
+
+ if fileobj is not None:
+ raise ValueError, "no support for external file objects"
+
+ try:
+ t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
+ except IOError:
+ raise ReadError, "not a bzip2 file"
+ t._extfileobj = False
+ return t
+
+ bz2open = classmethod(bz2open)
+
+ # All *open() methods are registered here.
+ OPEN_METH = {
+ "tar": "taropen", # uncompressed tar
+ "gz": "gzopen", # gzip compressed tar
+ "bz2": "bz2open" # bzip2 compressed tar
+ }
+
+ #--------------------------------------------------------------------------
+ # The public methods which TarFile provides:
+
+ def close(self):
+ """Close the TarFile. In write-mode, two finishing zero blocks are
+ appended to the archive.
+ """
+ if self.closed:
+ return
+
+ if self._mode in "aw":
+ self.fileobj.write(NUL * (BLOCKSIZE * 2))
+ self.offset += (BLOCKSIZE * 2)
+ # fill up the end with zero-blocks
+ # (like option -b20 for tar does)
+ blocks, remainder = divmod(self.offset, RECORDSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (RECORDSIZE - remainder))
+
+ if not self._extfileobj:
+ self.fileobj.close()
+ self.closed = True
+
+ def getmember(self, name):
+ """Return a TarInfo object for member `name'. If `name' can not be
+ found in the archive, KeyError is raised. If a member occurs more
+ than once in the archive, its last occurence is assumed to be the
+ most up-to-date version.
+ """
+ self._check()
+ if name not in self.membernames and not self._loaded:
+ self._load()
+ if name not in self.membernames:
+ raise KeyError, "filename %r not found" % name
+ return self._getmember(name)
+
+ def getmembers(self):
+ """Return the members of the archive as a list of TarInfo objects. The
+ list has the same order as the members in the archive.
+ """
+ self._check()
+ if not self._loaded: # if we want to obtain a list of
+ self._load() # all members, we first have to
+ # scan the whole archive.
+ return self.members
+
+ def getnames(self):
+ """Return the members of the archive as a list of their names. It has
+ the same order as the list returned by getmembers().
+ """
+ self._check()
+ if not self._loaded:
+ self._load()
+ return self.membernames
+
+ def gettarinfo(self, name=None, arcname=None, fileobj=None):
+ """Create a TarInfo object for either the file `name' or the file
+ object `fileobj' (using os.fstat on its file descriptor). You can
+ modify some of the TarInfo's attributes before you add it using
+ addfile(). If given, `arcname' specifies an alternative name for the
+ file in the archive.
+ """
+ self._check("aw")
+
+ # When fileobj is given, replace name by
+ # fileobj's real name.
+ if fileobj is not None:
+ name = fileobj.name
+
+ # Building the name of the member in the archive.
+ # Backward slashes are converted to forward slashes,
+ # Absolute paths are turned to relative paths.
+ if arcname is None:
+ arcname = name
+ arcname = normpath(arcname)
+ drv, arcname = os.path.splitdrive(arcname)
+ while arcname[0:1] == "/":
+ arcname = arcname[1:]
+
+ # Now, fill the TarInfo object with
+ # information specific for the file.
+ tarinfo = TarInfo()
+
+ # Use os.stat or os.lstat, depending on platform
+ # and if symlinks shall be resolved.
+ if fileobj is None:
+ if hasattr(os, "lstat") and not self.dereference:
+ statres = os.lstat(name)
+ else:
+ statres = os.stat(name)
+ else:
+ statres = os.fstat(fileobj.fileno())
+ linkname = ""
+
+ stmd = statres.st_mode
+ if stat.S_ISREG(stmd):
+ inode = (statres.st_ino, statres.st_dev)
+ if inode in self.inodes and not self.dereference:
+ # Is it a hardlink to an already
+ # archived file?
+ type = LNKTYPE
+ linkname = self.inodes[inode]
+ else:
+ # The inode is added only if its valid.
+ # For win32 it is always 0.
+ type = REGTYPE
+ if inode[0]:
+ self.inodes[inode] = arcname
+ elif stat.S_ISDIR(stmd):
+ type = DIRTYPE
+ if arcname[-1:] != "/":
+ arcname += "/"
+ elif stat.S_ISFIFO(stmd):
+ type = FIFOTYPE
+ elif stat.S_ISLNK(stmd):
+ type = SYMTYPE
+ linkname = os.readlink(name)
+ elif stat.S_ISCHR(stmd):
+ type = CHRTYPE
+ elif stat.S_ISBLK(stmd):
+ type = BLKTYPE
+ else:
+ return None
+
+ # Fill the TarInfo object with all
+ # information we can get.
+ tarinfo.name = arcname
+ tarinfo.mode = stmd
+ tarinfo.uid = statres.st_uid
+ tarinfo.gid = statres.st_gid
+ tarinfo.size = statres.st_size
+ tarinfo.mtime = statres.st_mtime
+ tarinfo.type = type
+ tarinfo.linkname = linkname
+ if pwd:
+ try:
+ tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
+ except KeyError:
+ pass
+ if grp:
+ try:
+ tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
+ except KeyError:
+ pass
+
+ if type in (CHRTYPE, BLKTYPE):
+ if hasattr(os, "major") and hasattr(os, "minor"):
+ tarinfo.devmajor = os.major(statres.st_rdev)
+ tarinfo.devminor = os.minor(statres.st_rdev)
+ return tarinfo
+
+ def list(self, verbose=True):
+ """Print a table of contents to sys.stdout. If `verbose' is False, only
+ the names of the members are printed. If it is True, an `ls -l'-like
+ output is produced.
+ """
+ self._check()
+
+ for tarinfo in self:
+ if verbose:
+ print filemode(tarinfo.mode),
+ print "%s/%s" % (tarinfo.uname or tarinfo.uid,
+ tarinfo.gname or tarinfo.gid),
+ if tarinfo.ischr() or tarinfo.isblk():
+ print "%10s" % ("%d,%d" \
+ % (tarinfo.devmajor, tarinfo.devminor)),
+ else:
+ print "%10d" % tarinfo.size,
+ print "%d-%02d-%02d %02d:%02d:%02d" \
+ % time.localtime(tarinfo.mtime)[:6],
+
+ print tarinfo.name,
+
+ if verbose:
+ if tarinfo.issym():
+ print "->", tarinfo.linkname,
+ if tarinfo.islnk():
+ print "link to", tarinfo.linkname,
+ print
+
+ def add(self, name, arcname=None, recursive=True):
+ """Add the file `name' to the archive. `name' may be any type of file
+ (directory, fifo, symbolic link, etc.). If given, `arcname'
+ specifies an alternative name for the file in the archive.
+ Directories are added recursively by default. This can be avoided by
+ setting `recursive' to False.
+ """
+ self._check("aw")
+
+ if arcname is None:
+ arcname = name
+
+ # Skip if somebody tries to archive the archive...
+ if self.name is not None \
+ and os.path.abspath(name) == os.path.abspath(self.name):
+ self._dbg(2, "tarfile: Skipped %r" % name)
+ return
+
+ # Special case: The user wants to add the current
+ # working directory.
+ if name == ".":
+ if recursive:
+ if arcname == ".":
+ arcname = ""
+ for f in os.listdir("."):
+ self.add(f, os.path.join(arcname, f))
+ return
+
+ self._dbg(1, name)
+
+ # Create a TarInfo object from the file.
+ tarinfo = self.gettarinfo(name, arcname)
+
+ if tarinfo is None:
+ self._dbg(1, "tarfile: Unsupported type %r" % name)
+ return
+
+ # Append the tar header and data to the archive.
+ if tarinfo.isreg():
+ f = file(name, "rb")
+ self.addfile(tarinfo, f)
+ f.close()
+
+ if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
+ tarinfo.size = 0L
+ self.addfile(tarinfo)
+
+ if tarinfo.isdir():
+ self.addfile(tarinfo)
+ if recursive:
+ for f in os.listdir(name):
+ self.add(os.path.join(name, f), os.path.join(arcname, f))
+
+ def addfile(self, tarinfo, fileobj=None):
+ """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
+ given, tarinfo.size bytes are read from it and added to the archive.
+ You can create TarInfo objects using gettarinfo().
+ On Windows platforms, `fileobj' should always be opened with mode
+ 'rb' to avoid irritation about the file size.
+ """
+ self._check("aw")
+
+ tarinfo.name = normpath(tarinfo.name)
+ if tarinfo.isdir():
+ # directories should end with '/'
+ tarinfo.name += "/"
+
+ if tarinfo.linkname:
+ tarinfo.linkname = normpath(tarinfo.linkname)
+
+ if tarinfo.size > MAXSIZE_MEMBER:
+ raise ValueError, "file is too large (>8GB)"
+
+ if len(tarinfo.linkname) > LENGTH_LINK:
+ if self.posix:
+ raise ValueError, "linkname is too long (>%d)" \
+ % (LENGTH_LINK)
+ else:
+ self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
+ tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
+ self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
+
+ if len(tarinfo.name) > LENGTH_NAME:
+ if self.posix:
+ prefix = tarinfo.name[:LENGTH_PREFIX + 1]
+ while prefix and prefix[-1] != "/":
+ prefix = prefix[:-1]
+
+ name = tarinfo.name[len(prefix):]
+ prefix = prefix[:-1]
+
+ if not prefix or len(name) > LENGTH_NAME:
+ raise ValueError, "name is too long (>%d)" \
+ % (LENGTH_NAME)
+
+ tarinfo.name = name
+ tarinfo.prefix = prefix
+ else:
+ self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
+ tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
+ self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
+
+ self.fileobj.write(tarinfo.tobuf())
+ self.offset += BLOCKSIZE
+
+ # If there's data to follow, append it.
+ if fileobj is not None:
+ copyfileobj(fileobj, self.fileobj, tarinfo.size)
+ blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (BLOCKSIZE - remainder))
+ blocks += 1
+ self.offset += blocks * BLOCKSIZE
+
+ self.members.append(tarinfo)
+ self.membernames.append(tarinfo.name)
+ self.chunks.append(self.offset)
+
+ def extract(self, member, path=""):
+ """Extract a member from the archive to the current working directory,
+ using its full name. Its file information is extracted as accurately
+ as possible. `member' may be a filename or a TarInfo object. You can
+ specify a different directory using `path'.
+ """
+ self._check("r")
+
+ if isinstance(member, TarInfo):
+ tarinfo = member
+ else:
+ tarinfo = self.getmember(member)
+
+ try:
+ self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
+ except EnvironmentError, e:
+ if self.errorlevel > 0:
+ raise
+ else:
+ if e.filename is None:
+ self._dbg(1, "tarfile: %s" % e.strerror)
+ else:
+ self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
+ except ExtractError, e:
+ if self.errorlevel > 1:
+ raise
+ else:
+ self._dbg(1, "tarfile: %s" % e)
+
+ def extractfile(self, member):
+ """Extract a member from the archive as a file object. `member' may be
+ a filename or a TarInfo object. If `member' is a regular file, a
+ file-like object is returned. If `member' is a link, a file-like
+ object is constructed from the link's target. If `member' is none of
+ the above, None is returned.
+ The file-like object is read-only and provides the following
+ methods: read(), readline(), readlines(), seek() and tell()
+ """
+ self._check("r")
+
+ if isinstance(member, TarInfo):
+ tarinfo = member
+ else:
+ tarinfo = self.getmember(member)
+
+ if tarinfo.isreg():
+ return self.fileobject(self, tarinfo)
+
+ elif tarinfo.type not in SUPPORTED_TYPES:
+ # If a member's type is unknown, it is treated as a
+ # regular file.
+ return self.fileobject(self, tarinfo)
+
+ elif tarinfo.islnk() or tarinfo.issym():
+ if isinstance(self.fileobj, _Stream):
+ # A small but ugly workaround for the case that someone tries
+ # to extract a (sym)link as a file-object from a non-seekable
+ # stream of tar blocks.
+ raise StreamError, "cannot extract (sym)link as file object"
+ else:
+ # A (sym)link's file object is it's target's file object.
+ return self.extractfile(self._getmember(tarinfo.linkname,
+ tarinfo))
+ else:
+ # If there's no data associated with the member (directory, chrdev,
+ # blkdev, etc.), return None instead of a file object.
+ return None
+
+ def _extract_member(self, tarinfo, targetpath):
+ """Extract the TarInfo object tarinfo to a physical
+ file called targetpath.
+ """
+ # Fetch the TarInfo object for the given name
+ # and build the destination pathname, replacing
+ # forward slashes to platform specific separators.
+ if targetpath[-1:] == "/":
+ targetpath = targetpath[:-1]
+ targetpath = os.path.normpath(targetpath)
+
+ # Create all upper directories.
+ upperdirs = os.path.dirname(targetpath)
+ if upperdirs and not os.path.exists(upperdirs):
+ ti = TarInfo()
+ ti.name = upperdirs
+ ti.type = DIRTYPE
+ ti.mode = 0777
+ ti.mtime = tarinfo.mtime
+ ti.uid = tarinfo.uid
+ ti.gid = tarinfo.gid
+ ti.uname = tarinfo.uname
+ ti.gname = tarinfo.gname
+ try:
+ self._extract_member(ti, ti.name)
+ except:
+ pass
+
+ if tarinfo.islnk() or tarinfo.issym():
+ self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
+ else:
+ self._dbg(1, tarinfo.name)
+
+ if tarinfo.isreg():
+ self.makefile(tarinfo, targetpath)
+ elif tarinfo.isdir():
+ self.makedir(tarinfo, targetpath)
+ elif tarinfo.isfifo():
+ self.makefifo(tarinfo, targetpath)
+ elif tarinfo.ischr() or tarinfo.isblk():
+ self.makedev(tarinfo, targetpath)
+ elif tarinfo.islnk() or tarinfo.issym():
+ self.makelink(tarinfo, targetpath)
+ elif tarinfo.type not in SUPPORTED_TYPES:
+ self.makeunknown(tarinfo, targetpath)
+ else:
+ self.makefile(tarinfo, targetpath)
+
+ self.chown(tarinfo, targetpath)
+ if not tarinfo.issym():
+ self.chmod(tarinfo, targetpath)
+ self.utime(tarinfo, targetpath)
+
+ #--------------------------------------------------------------------------
+ # Below are the different file methods. They are called via
+ # _extract_member() when extract() is called. They can be replaced in a
+ # subclass to implement other functionality.
+
+ def makedir(self, tarinfo, targetpath):
+ """Make a directory called targetpath.
+ """
+ try:
+ os.mkdir(targetpath)
+ except EnvironmentError, e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ def makefile(self, tarinfo, targetpath):
+ """Make a file called targetpath.
+ """
+ source = self.extractfile(tarinfo)
+ target = file(targetpath, "wb")
+ copyfileobj(source, target)
+ source.close()
+ target.close()
+
+ def makeunknown(self, tarinfo, targetpath):
+ """Make a file from a TarInfo object with an unknown type
+ at targetpath.
+ """
+ self.makefile(tarinfo, targetpath)
+ self._dbg(1, "tarfile: Unknown file type %r, " \
+ "extracted as regular file." % tarinfo.type)
+
+ def makefifo(self, tarinfo, targetpath):
+ """Make a fifo called targetpath.
+ """
+ if hasattr(os, "mkfifo"):
+ os.mkfifo(targetpath)
+ else:
+ raise ExtractError, "fifo not supported by system"
+
+ def makedev(self, tarinfo, targetpath):
+ """Make a character or block device called targetpath.
+ """
+ if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
+ raise ExtractError, "special devices not supported by system"
+
+ mode = tarinfo.mode
+ if tarinfo.isblk():
+ mode |= stat.S_IFBLK
+ else:
+ mode |= stat.S_IFCHR
+
+ os.mknod(targetpath, mode,
+ os.makedev(tarinfo.devmajor, tarinfo.devminor))
+
+ def makelink(self, tarinfo, targetpath):
+ """Make a (symbolic) link called targetpath. If it cannot be created
+ (platform limitation), we try to make a copy of the referenced file
+ instead of a link.
+ """
+ linkpath = tarinfo.linkname
+ try:
+ if tarinfo.issym():
+ os.symlink(linkpath, targetpath)
+ else:
+ os.link(linkpath, targetpath)
+ except AttributeError:
+ if tarinfo.issym():
+ linkpath = os.path.join(os.path.dirname(tarinfo.name),
+ linkpath)
+ linkpath = normpath(linkpath)
+
+ try:
+ self._extract_member(self.getmember(linkpath), targetpath)
+ except (EnvironmentError, KeyError), e:
+ linkpath = os.path.normpath(linkpath)
+ try:
+ shutil.copy2(linkpath, targetpath)
+ except EnvironmentError, e:
+ raise IOError, "link could not be created"
+
+ def chown(self, tarinfo, targetpath):
+ """Set owner of targetpath according to tarinfo.
+ """
+ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
+ # We have to be root to do so.
+ try:
+ g = grp.getgrnam(tarinfo.gname)[2]
+ except KeyError:
+ try:
+ g = grp.getgrgid(tarinfo.gid)[2]
+ except KeyError:
+ g = os.getgid()
+ try:
+ u = pwd.getpwnam(tarinfo.uname)[2]
+ except KeyError:
+ try:
+ u = pwd.getpwuid(tarinfo.uid)[2]
+ except KeyError:
+ u = os.getuid()
+ try:
+ if tarinfo.issym() and hasattr(os, "lchown"):
+ os.lchown(targetpath, u, g)
+ else:
+ os.chown(targetpath, u, g)
+ except EnvironmentError, e:
+ raise ExtractError, "could not change owner"
+
+ def chmod(self, tarinfo, targetpath):
+ """Set file permissions of targetpath according to tarinfo.
+ """
+ try:
+ os.chmod(targetpath, tarinfo.mode)
+ except EnvironmentError, e:
+ raise ExtractError, "could not change mode"
+
+ def utime(self, tarinfo, targetpath):
+ """Set modification time of targetpath according to tarinfo.
+ """
+ if sys.platform == "win32" and tarinfo.isdir():
+ # According to msdn.microsoft.com, it is an error (EACCES)
+ # to use utime() on directories.
+ return
+ try:
+ os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
+ except EnvironmentError, e:
+ raise ExtractError, "could not change modification time"
+
+ #--------------------------------------------------------------------------
+
+ def next(self):
+ """Return the next member of the archive as a TarInfo object, when
+ TarFile is opened for reading. Return None if there is no more
+ available.
+ """
+ self._check("ra")
+ if self.firstmember is not None:
+ m = self.firstmember
+ self.firstmember = None
+ return m
+
+ # Read the next block.
+ self.fileobj.seek(self.chunks[-1])
+ while True:
+ buf = self.fileobj.read(BLOCKSIZE)
+ if not buf:
+ return None
+ try:
+ tarinfo = TarInfo.frombuf(buf)
+ except ValueError:
+ if self.ignore_zeros:
+ if buf.count(NUL) == BLOCKSIZE:
+ adj = "empty"
+ else:
+ adj = "invalid"
+ self._dbg(2, "0x%X: %s block" % (self.offset, adj))
+ self.offset += BLOCKSIZE
+ continue
+ else:
+ # Block is empty or unreadable.
+ if self.chunks[-1] == 0:
+ # If the first block is invalid. That does not
+ # look like a tar archive we can handle.
+ raise ReadError,"empty, unreadable or compressed file"
+ return None
+ break
+
+ # We shouldn't rely on this checksum, because some tar programs
+ # calculate it differently and it is merely validating the
+ # header block. We could just as well skip this part, which would
+ # have a slight effect on performance...
+ if tarinfo.chksum != calc_chksum(buf):
+ self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
+
+ # Set the TarInfo object's offset to the current position of the
+ # TarFile and set self.offset to the position where the data blocks
+ # should begin.
+ tarinfo.offset = self.offset
+ self.offset += BLOCKSIZE
+
+ # Check if the TarInfo object has a typeflag for which a callback
+ # method is registered in the TYPE_METH. If so, then call it.
+ if tarinfo.type in self.TYPE_METH:
+ tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
+ else:
+ tarinfo.offset_data = self.offset
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Skip the following data blocks.
+ self.offset += self._block(tarinfo.size)
+
+ if tarinfo.isreg() and tarinfo.name[:-1] == "/":
+ # some old tar programs don't know DIRTYPE
+ tarinfo.type = DIRTYPE
+
+ self.members.append(tarinfo)
+ self.membernames.append(tarinfo.name)
+ self.chunks.append(self.offset)
+ return tarinfo
+
+ #--------------------------------------------------------------------------
+ # Below are some methods which are called for special typeflags in the
+ # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
+ # are registered in TYPE_METH below. You can register your own methods
+ # with this mapping.
+ # A registered method is called with a TarInfo object as only argument.
+ #
+ # During its execution the method MUST perform the following tasks:
+ # 1. set tarinfo.offset_data to the position where the data blocks begin,
+ # if there is data to follow.
+ # 2. set self.offset to the position where the next member's header will
+ # begin.
+ # 3. return a valid TarInfo object.
+
+ def proc_gnulong(self, tarinfo):
+ """Evaluate the blocks that hold a GNU longname
+ or longlink member.
+ """
+ buf = ""
+ name = None
+ linkname = None
+ count = tarinfo.size
+ while count > 0:
+ block = self.fileobj.read(BLOCKSIZE)
+ buf += block
+ self.offset += BLOCKSIZE
+ count -= BLOCKSIZE
+
+ if tarinfo.type == GNUTYPE_LONGNAME:
+ name = nts(buf)
+ if tarinfo.type == GNUTYPE_LONGLINK:
+ linkname = nts(buf)
+
+ buf = self.fileobj.read(BLOCKSIZE)
+
+ tarinfo = TarInfo.frombuf(buf)
+ tarinfo.offset = self.offset
+ self.offset += BLOCKSIZE
+ tarinfo.offset_data = self.offset
+ tarinfo.name = name or tarinfo.name
+ tarinfo.linkname = linkname or tarinfo.linkname
+
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Skip the following data blocks.
+ self.offset += self._block(tarinfo.size)
+ return tarinfo
+
+ def proc_sparse(self, tarinfo):
+ """Analyze a GNU sparse header plus extra headers.
+ """
+ buf = tarinfo.tobuf()
+ sp = _ringbuffer()
+ pos = 386
+ lastpos = 0L
+ realpos = 0L
+ # There are 4 possible sparse structs in the
+ # first header.
+ for i in xrange(4):
+ try:
+ offset = int(buf[pos:pos + 12], 8)
+ numbytes = int(buf[pos + 12:pos + 24], 8)
+ except ValueError:
+ break
+ if offset > lastpos:
+ sp.append(_hole(lastpos, offset - lastpos))
+ sp.append(_data(offset, numbytes, realpos))
+ realpos += numbytes
+ lastpos = offset + numbytes
+ pos += 24
+
+ isextended = ord(buf[482])
+ origsize = int(buf[483:495], 8)
+
+ # If the isextended flag is given,
+ # there are extra headers to process.
+ while isextended == 1:
+ buf = self.fileobj.read(BLOCKSIZE)
+ self.offset += BLOCKSIZE
+ pos = 0
+ for i in xrange(21):
+ try:
+ offset = int(buf[pos:pos + 12], 8)
+ numbytes = int(buf[pos + 12:pos + 24], 8)
+ except ValueError:
+ break
+ if offset > lastpos:
+ sp.append(_hole(lastpos, offset - lastpos))
+ sp.append(_data(offset, numbytes, realpos))
+ realpos += numbytes
+ lastpos = offset + numbytes
+ pos += 24
+ isextended = ord(buf[504])
+
+ if lastpos < origsize:
+ sp.append(_hole(lastpos, origsize - lastpos))
+
+ tarinfo.sparse = sp
+
+ tarinfo.offset_data = self.offset
+ self.offset += self._block(tarinfo.size)
+ tarinfo.size = origsize
+ return tarinfo
+
+ # The type mapping for the next() method. The keys are single character
+ # strings, the typeflag. The values are methods which are called when
+ # next() encounters such a typeflag.
+ TYPE_METH = {
+ GNUTYPE_LONGNAME: proc_gnulong,
+ GNUTYPE_LONGLINK: proc_gnulong,
+ GNUTYPE_SPARSE: proc_sparse
+ }
+
+ #--------------------------------------------------------------------------
+ # Little helper methods:
+
+ def _block(self, count):
+ """Round up a byte count by BLOCKSIZE and return it,
+ e.g. _block(834) => 1024.
+ """
+ blocks, remainder = divmod(count, BLOCKSIZE)
+ if remainder:
+ blocks += 1
+ return blocks * BLOCKSIZE
+
+ def _getmember(self, name, tarinfo=None):
+ """Find an archive member by name from bottom to top.
+ If tarinfo is given, it is used as the starting point.
+ """
+ if tarinfo is None:
+ end = len(self.members)
+ else:
+ end = self.members.index(tarinfo)
+
+ for i in xrange(end - 1, -1, -1):
+ if name == self.membernames[i]:
+ return self.members[i]
+
+ def _load(self):
+ """Read through the entire archive file and look for readable
+ members.
+ """
+ while True:
+ tarinfo = self.next()
+ if tarinfo is None:
+ break
+ self._loaded = True
+
+ def _check(self, mode=None):
+ """Check if TarFile is still open, and if the operation's mode
+ corresponds to TarFile's mode.
+ """
+ if self.closed:
+ raise IOError, "%s is closed" % self.__class__.__name__
+ if mode is not None and self._mode not in mode:
+ raise IOError, "bad operation for mode %r" % self._mode
+
+ def __iter__(self):
+ """Provide an iterator object.
+ """
+ if self._loaded:
+ return iter(self.members)
+ else:
+ return TarIter(self)
+
+ def _create_gnulong(self, name, type):
+ """Write a GNU longname/longlink member to the TarFile.
+ It consists of an extended tar header, with the length
+ of the longname as size, followed by data blocks,
+ which contain the longname as a null terminated string.
+ """
+ tarinfo = TarInfo()
+ tarinfo.name = "././@LongLink"
+ tarinfo.type = type
+ tarinfo.mode = 0
+ tarinfo.size = len(name)
+
+ # write extended header
+ self.fileobj.write(tarinfo.tobuf())
+ # write name blocks
+ self.fileobj.write(name)
+ blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (BLOCKSIZE - remainder))
+ blocks += 1
+ self.offset += blocks * BLOCKSIZE
+
+ def _dbg(self, level, msg):
+ """Write debugging output to sys.stderr.
+ """
+ if level <= self.debug:
+ print >> sys.stderr, msg
+# class TarFile
+
+class TarIter:
+ """Iterator Class.
+
+ for tarinfo in TarFile(...):
+ suite...
+ """
+
+ def __init__(self, tarfile):
+ """Construct a TarIter object.
+ """
+ self.tarfile = tarfile
+ def __iter__(self):
+ """Return iterator object.
+ """
+ return self
+ def next(self):
+ """Return the next item using TarFile's next() method.
+ When all members have been read, set TarFile as _loaded.
+ """
+ tarinfo = self.tarfile.next()
+ if not tarinfo:
+ self.tarfile._loaded = True
+ raise StopIteration
+ return tarinfo
+
+# Helper classes for sparse file support
+class _section:
+ """Base class for _data and _hole.
+ """
+ def __init__(self, offset, size):
+ self.offset = offset
+ self.size = size
+ def __contains__(self, offset):
+ return self.offset <= offset < self.offset + self.size
+
+class _data(_section):
+ """Represent a data section in a sparse file.
+ """
+ def __init__(self, offset, size, realpos):
+ _section.__init__(self, offset, size)
+ self.realpos = realpos
+
+class _hole(_section):
+ """Represent a hole section in a sparse file.
+ """
+ pass
+
+class _ringbuffer(list):
+ """Ringbuffer class which increases performance
+ over a regular list.
+ """
+ def __init__(self):
+ self.idx = 0
+ def find(self, offset):
+ idx = self.idx
+ while True:
+ item = self[idx]
+ if offset in item:
+ break
+ idx += 1
+ if idx == len(self):
+ idx = 0
+ if idx == self.idx:
+ # End of File
+ return None
+ self.idx = idx
+ return item
+
+#---------------------------------------------
+# zipfile compatible TarFile class
+#---------------------------------------------
+TAR_PLAIN = 0 # zipfile.ZIP_STORED
+TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
+class TarFileCompat:
+ """TarFile class compatible with standard module zipfile's
+ ZipFile class.
+ """
+ def __init__(self, file, mode="r", compression=TAR_PLAIN):
+ if compression == TAR_PLAIN:
+ self.tarfile = TarFile.taropen(file, mode)
+ elif compression == TAR_GZIPPED:
+ self.tarfile = TarFile.gzopen(file, mode)
+ else:
+ raise ValueError, "unknown compression constant"
+ if mode[0:1] == "r":
+ members = self.tarfile.getmembers()
+ for i in xrange(len(members)):
+ m = members[i]
+ m.filename = m.name
+ m.file_size = m.size
+ m.date_time = time.gmtime(m.mtime)[:6]
+ def namelist(self):
+ return map(lambda m: m.name, self.infolist())
+ def infolist(self):
+ return filter(lambda m: m.type in REGULAR_TYPES,
+ self.tarfile.getmembers())
+ def printdir(self):
+ self.tarfile.list()
+ def testzip(self):
+ return
+ def getinfo(self, name):
+ return self.tarfile.getmember(name)
+ def read(self, name):
+ return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
+ def write(self, filename, arcname=None, compress_type=None):
+ self.tarfile.add(filename, arcname)
+ def writestr(self, zinfo, bytes):
+ import StringIO
+ import calendar
+ zinfo.name = zinfo.filename
+ zinfo.size = zinfo.file_size
+ zinfo.mtime = calendar.timegm(zinfo.date_time)
+ self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
+ def close(self):
+ self.tarfile.close()
+#class TarFileCompat
+
+#--------------------
+# exported functions
+#--------------------
+def is_tarfile(name):
+ """Return True if name points to a tar archive that we
+ are able to handle, else return False.
+ """
+ try:
+ t = open(name)
+ t.close()
+ return True
+ except TarError:
+ return False
+
+open = TarFile.open
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
new file mode 100644
index 0000000..2dc06ea
--- /dev/null
+++ b/Lib/test/test_tarfile.py
@@ -0,0 +1,253 @@
+import sys
+import os
+import shutil
+
+import unittest
+import tarfile
+
+from test import test_support
+
+# Check for our compression modules.
+try:
+ import gzip
+except ImportError:
+ gzip = None
+try:
+ import bz2
+except ImportError:
+ bz2 = None
+
+def path(path):
+ return test_support.findfile(path)
+
+testtar = path("testtar.tar")
+tempdir = path("testtar.dir")
+tempname = path("testtar.tmp")
+membercount = 10
+
+def tarname(comp=""):
+ if not comp:
+ return testtar
+ return "%s.%s" % (testtar, comp)
+
+def dirname():
+ if not os.path.exists(tempdir):
+ os.mkdir(tempdir)
+ return tempdir
+
+def tmpname():
+ return tempname
+
+
+class BaseTest(unittest.TestCase):
+ comp = ''
+ mode = 'r'
+ sep = ':'
+
+ def setUp(self):
+ mode = self.mode + self.sep + self.comp
+ self.tar = tarfile.open(tarname(self.comp), mode)
+
+ def tearDown(self):
+ self.tar.close()
+
+class ReadTest(BaseTest):
+
+ def test(self):
+ """Test member extraction.
+ """
+ members = 0
+ for tarinfo in self.tar:
+ members += 1
+ if not tarinfo.isreg():
+ continue
+ f = self.tar.extractfile(tarinfo)
+ self.assert_(len(f.read()) == tarinfo.size,
+ "size read does not match expected size")
+ f.close()
+
+ self.assert_(members == membercount,
+ "could not find all members")
+
+ def test_sparse(self):
+ """Test sparse member extraction.
+ """
+ if self.sep != "|":
+ f1 = self.tar.extractfile("S-SPARSE")
+ f2 = self.tar.extractfile("S-SPARSE-WITH-NULLS")
+ self.assert_(f1.read() == f2.read(),
+ "_FileObject failed on sparse file member")
+
+ def test_readlines(self):
+ """Test readlines() method of _FileObject.
+ """
+ if self.sep != "|":
+ filename = "0-REGTYPE-TEXT"
+ self.tar.extract(filename, dirname())
+ lines1 = file(os.path.join(dirname(), filename), "r").readlines()
+ lines2 = self.tar.extractfile(filename).readlines()
+ self.assert_(lines1 == lines2,
+ "_FileObject.readline() does not work correctly")
+
+ def test_seek(self):
+ """Test seek() method of _FileObject, incl. random reading.
+ """
+ if self.sep != "|":
+ filename = "0-REGTYPE"
+ self.tar.extract(filename, dirname())
+ data = file(os.path.join(dirname(), filename), "rb").read()
+
+ tarinfo = self.tar.getmember(filename)
+ fobj = self.tar.extractfile(tarinfo)
+
+ text = fobj.read()
+ fobj.seek(0)
+ self.assert_(0 == fobj.tell(),
+ "seek() to file's start failed")
+ fobj.seek(2048, 0)
+ self.assert_(2048 == fobj.tell(),
+ "seek() to absolute position failed")
+ fobj.seek(-1024, 1)
+ self.assert_(1024 == fobj.tell(),
+ "seek() to negative relative position failed")
+ fobj.seek(1024, 1)
+ self.assert_(2048 == fobj.tell(),
+ "seek() to positive relative position failed")
+ s = fobj.read(10)
+ self.assert_(s == data[2048:2058],
+ "read() after seek failed")
+ fobj.seek(0, 2)
+ self.assert_(tarinfo.size == fobj.tell(),
+ "seek() to file's end failed")
+ self.assert_(fobj.read() == "",
+ "read() at file's end did not return empty string")
+ fobj.seek(-tarinfo.size, 2)
+ self.assert_(0 == fobj.tell(),
+ "relative seek() to file's start failed")
+ fobj.seek(512)
+ s1 = fobj.readlines()
+ fobj.seek(512)
+ s2 = fobj.readlines()
+ self.assert_(s1 == s2,
+ "readlines() after seek failed")
+ fobj.close()
+
+class ReadStreamTest(ReadTest):
+ sep = "|"
+
+ def test(self):
+ """Test member extraction, and for StreamError when
+ seeking backwards.
+ """
+ ReadTest.test(self)
+ tarinfo = self.tar.getmembers()[0]
+ f = self.tar.extractfile(tarinfo)
+ self.assertRaises(tarfile.StreamError, f.read)
+
+ def test_stream(self):
+ """Compare the normal tar and the stream tar.
+ """
+ stream = self.tar
+ tar = tarfile.open(tarname(), 'r')
+
+ while 1:
+ t1 = tar.next()
+ t2 = stream.next()
+ if t1 is None:
+ break
+ self.assert_(t2 is not None, "stream.next() failed.")
+
+ if t2.islnk() or t2.issym():
+ self.assertRaises(tarfile.StreamError, stream.extractfile, t2)
+ continue
+ v1 = tar.extractfile(t1)
+ v2 = stream.extractfile(t2)
+ if v1 is None:
+ continue
+ self.assert_(v2 is not None, "stream.extractfile() failed")
+ self.assert_(v1.read() == v2.read(), "stream extraction failed")
+
+ stream.close()
+
+class WriteTest(BaseTest):
+ mode = 'w'
+
+ def setUp(self):
+ mode = self.mode + self.sep + self.comp
+ self.src = tarfile.open(tarname(self.comp), 'r')
+ self.dst = tarfile.open(tmpname(), mode)
+
+ def tearDown(self):
+ self.src.close()
+ self.dst.close()
+
+ def test_posix(self):
+ self.dst.posix = 1
+ self._test()
+
+ def test_nonposix(self):
+ self.dst.posix = 0
+ self._test()
+
+ def _test(self):
+ for tarinfo in self.src:
+ if not tarinfo.isreg():
+ continue
+ f = self.src.extractfile(tarinfo)
+ if self.dst.posix and len(tarinfo.name) > tarfile.LENGTH_NAME:
+ self.assertRaises(ValueError, self.dst.addfile,
+ tarinfo, f)
+ else:
+ self.dst.addfile(tarinfo, f)
+
+class WriteStreamTest(WriteTest):
+ sep = '|'
+
+# Gzip TestCases
+class ReadTestGzip(ReadTest):
+ comp = "gz"
+class ReadStreamTestGzip(ReadStreamTest):
+ comp = "gz"
+class WriteTestGzip(WriteTest):
+ comp = "gz"
+class WriteStreamTestGzip(WriteStreamTest):
+ comp = "gz"
+
+if bz2:
+ # Bzip2 TestCases
+ class ReadTestBzip2(ReadTestGzip):
+ comp = "bz2"
+ class ReadStreamTestBzip2(ReadStreamTestGzip):
+ comp = "bz2"
+ class WriteTestBzip2(WriteTest):
+ comp = "bz2"
+ class WriteStreamTestBzip2(WriteStreamTestGzip):
+ comp = "bz2"
+
+# If importing gzip failed, discard the Gzip TestCases.
+if not gzip:
+ del ReadTestGzip
+ del ReadStreamTestGzip
+ del WriteTestGzip
+ del WriteStreamTestGzip
+
+if __name__ == "__main__":
+ if gzip:
+ # create testtar.tar.gz
+ gzip.open(tarname("gz"), "wb").write(file(tarname(), "rb").read())
+ if bz2:
+ # create testtar.tar.bz2
+ bz2.BZ2File(tarname("bz2"), "wb").write(file(tarname(), "rb").read())
+
+ try:
+ unittest.main()
+ finally:
+ if gzip:
+ os.remove(tarname("gz"))
+ if bz2:
+ os.remove(tarname("bz2"))
+ if os.path.exists(tempdir):
+ shutil.rmtree(tempdir)
+ if os.path.exists(tempname):
+ os.remove(tempname)
+
diff --git a/Lib/test/testtar.tar b/Lib/test/testtar.tar
new file mode 100644
index 0000000..40125b3
--- /dev/null
+++ b/Lib/test/testtar.tar
Binary files differ
diff --git a/Misc/ACKS b/Misc/ACKS
index 839f8ed..a9773d8 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -196,6 +196,7 @@ Eddy De Greef
Duncan Grisby
Dag Gruneau
Michael Guravage
+Lars Gustäbel
Barry Haddow
Paul ten Hagen
Rasmus Hahn
diff --git a/Misc/NEWS b/Misc/NEWS
index 8bc29a6..3051110 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -63,6 +63,10 @@ Library
It is also exported for ntpath, macpath, and os2emxpath.
See SF bug #659228.
+- New module tarfile from Lars Gustäbel provides a comprehensive interface
+ to tar archive files with transparent gzip and bzip2 compression.
+ See SF patch #651082.
+
Tools/Demos
-----------