summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2005-03-05 12:47:42 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2005-03-05 12:47:42 (GMT)
commit78be7df9e46ffff4ff97f25a0d68e6bb26e8fe3e (patch)
tree0d3bd832a57847b4718940c70b5d5ec4124e2432
parent409d8f2ebdf597837c593a40659949e5b15f457d (diff)
downloadcpython-78be7df9e46ffff4ff97f25a0d68e6bb26e8fe3e.zip
cpython-78be7df9e46ffff4ff97f25a0d68e6bb26e8fe3e.tar.gz
cpython-78be7df9e46ffff4ff97f25a0d68e6bb26e8fe3e.tar.bz2
Patch #918101: Add tarfile open mode r|* for auto-detection of the
stream compression; add, for symmetry reasons, r:* as a synonym of r.
-rw-r--r--Doc/lib/libtarfile.tex3
-rw-r--r--Lib/tarfile.py96
-rw-r--r--Lib/test/test_tarfile.py29
-rw-r--r--Misc/NEWS3
4 files changed, 98 insertions, 33 deletions
diff --git a/Doc/lib/libtarfile.tex b/Doc/lib/libtarfile.tex
index 018e8b9..9088473 100644
--- a/Doc/lib/libtarfile.tex
+++ b/Doc/lib/libtarfile.tex
@@ -32,7 +32,7 @@ Some facts and figures:
it defaults to \code{'r'}. Here is a full list of mode combinations:
\begin{tableii}{c|l}{code}{mode}{action}
- \lineii{'r'}{Open for reading with transparent compression (recommended).}
+ \lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
\lineii{'r:'}{Open for reading exclusively without compression.}
\lineii{'r:gz'}{Open for reading with gzip compression.}
\lineii{'r:bz2'}{Open for reading with bzip2 compression.}
@@ -65,6 +65,7 @@ Some facts and figures:
(section~\ref{tar-examples}). The currently possible modes:
\begin{tableii}{c|l}{code}{Mode}{Action}
+ \lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 8bce5d0..56cce03 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -274,7 +274,7 @@ class _Stream:
_Stream is intended to be used only internally.
"""
- def __init__(self, name, mode, type, fileobj, bufsize):
+ def __init__(self, name, mode, comptype, fileobj, bufsize):
"""Construct a _Stream object.
"""
self._extfileobj = True
@@ -282,16 +282,22 @@ class _Stream:
fileobj = _LowLevelFile(name, mode)
self._extfileobj = False
- self.name = name or ""
- self.mode = mode
- self.type = type
- self.fileobj = fileobj
- self.bufsize = bufsize
- self.buf = ""
- self.pos = 0L
- self.closed = False
-
- if type == "gz":
+ if comptype == '*':
+ # Enable transparent compression detection for the
+ # stream interface
+ fileobj = _StreamProxy(fileobj)
+ comptype = fileobj.getcomptype()
+
+ self.name = name or ""
+ self.mode = mode
+ self.comptype = comptype
+ self.fileobj = fileobj
+ self.bufsize = bufsize
+ self.buf = ""
+ self.pos = 0L
+ self.closed = False
+
+ if comptype == "gz":
try:
import zlib
except ImportError:
@@ -303,7 +309,7 @@ class _Stream:
else:
self._init_write_gz()
- if type == "bz2":
+ if comptype == "bz2":
try:
import bz2
except ImportError:
@@ -315,7 +321,7 @@ class _Stream:
self.cmp = bz2.BZ2Compressor()
def __del__(self):
- if not self.closed:
+ if hasattr(self, "closed") and not self.closed:
self.close()
def _init_write_gz(self):
@@ -334,10 +340,10 @@ class _Stream:
def write(self, s):
"""Write string s to the stream.
"""
- if self.type == "gz":
+ if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc)
self.pos += len(s)
- if self.type != "tar":
+ if self.comptype != "tar":
s = self.cmp.compress(s)
self.__write(s)
@@ -357,12 +363,16 @@ class _Stream:
if self.closed:
return
- if self.mode == "w" and self.type != "tar":
+ if self.mode == "w" and self.comptype != "tar":
self.buf += self.cmp.flush()
+
if self.mode == "w" and self.buf:
+ blocks, remainder = divmod(len(self.buf), self.bufsize)
+ if remainder > 0:
+ self.buf += NUL * (self.bufsize - remainder)
self.fileobj.write(self.buf)
self.buf = ""
- if self.type == "gz":
+ if self.comptype == "gz":
self.fileobj.write(struct.pack("<l", self.crc))
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
@@ -441,7 +451,7 @@ class _Stream:
def _read(self, size):
"""Return size bytes from the stream.
"""
- if self.type == "tar":
+ if self.comptype == "tar":
return self.__read(size)
c = len(self.dbuf)
@@ -474,6 +484,30 @@ class _Stream:
return t[:size]
# class _Stream
+class _StreamProxy(object):
+ """Small proxy class that enables transparent compression
+ detection for the Stream interface (mode 'r|*').
+ """
+
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.buf = self.fileobj.read(BLOCKSIZE)
+
+ def read(self, size):
+ self.read = self.fileobj.read
+ return self.buf
+
+ def getcomptype(self):
+ if self.buf.startswith("\037\213\010"):
+ return "gz"
+ if self.buf.startswith("BZh91"):
+ return "bz2"
+ return "tar"
+
+ def close(self):
+ self.fileobj.close()
+# class StreamProxy
+
#------------------------
# Extraction file object
#------------------------
@@ -879,7 +913,7 @@ class TarFile(object):
an appropriate TarFile class.
mode:
- 'r' open for reading with transparent compression
+ 'r' or 'r:*' open for reading with transparent compression
'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression
@@ -887,6 +921,8 @@ class TarFile(object):
'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression
+
+ 'r|*' open a stream of tar blocks with transparent compression
'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks
@@ -898,7 +934,17 @@ class TarFile(object):
if not name and not fileobj:
raise ValueError, "nothing to open"
- if ":" in mode:
+ if mode in ("r", "r:*"):
+ # Find out which *open() is appropriate for opening the file.
+ for comptype in cls.OPEN_METH:
+ func = getattr(cls, cls.OPEN_METH[comptype])
+ try:
+ return func(name, "r", fileobj)
+ except (ReadError, CompressionError):
+ continue
+ raise ReadError, "file could not be opened successfully"
+
+ elif ":" in mode:
filemode, comptype = mode.split(":", 1)
filemode = filemode or "r"
comptype = comptype or "tar"
@@ -924,16 +970,6 @@ class TarFile(object):
t._extfileobj = False
return t
- elif mode == "r":
- # Find out which *open() is appropriate for opening the file.
- for comptype in cls.OPEN_METH:
- func = getattr(cls, cls.OPEN_METH[comptype])
- try:
- return func(name, "r", fileobj)
- except (ReadError, CompressionError):
- continue
- raise ReadError, "file could not be opened successfully"
-
elif mode in "aw":
return cls.taropen(name, mode, fileobj)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index a6c4c4a..cc5e505 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -181,6 +181,18 @@ class ReadStreamTest(ReadTest):
stream.close()
+class ReadAsteriskTest(ReadTest):
+
+ def setUp(self):
+ mode = self.mode + self.sep + "*"
+ self.tar = tarfile.open(tarname(self.comp), mode)
+
+class ReadStreamAsteriskTest(ReadStreamTest):
+
+ def setUp(self):
+ mode = self.mode + self.sep + "*"
+ self.tar = tarfile.open(tarname(self.comp), mode)
+
class WriteTest(BaseTest):
mode = 'w'
@@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest):
comp = "gz"
class WriteStreamTestGzip(WriteStreamTest):
comp = "gz"
+class ReadAsteriskTestGzip(ReadAsteriskTest):
+ comp = "gz"
+class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
+ comp = "gz"
+
# Filemode test cases
@@ -355,6 +372,10 @@ if bz2:
comp = "bz2"
class WriteStreamTestBzip2(WriteStreamTestGzip):
comp = "bz2"
+ class ReadAsteriskTestBzip2(ReadAsteriskTest):
+ comp = "bz2"
+ class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest):
+ comp = "bz2"
# If importing gzip failed, discard the Gzip TestCases.
if not gzip:
@@ -375,6 +396,8 @@ def test_main():
FileModeTest,
ReadTest,
ReadStreamTest,
+ ReadAsteriskTest,
+ ReadStreamAsteriskTest,
WriteTest,
WriteStreamTest,
WriteGNULongTest,
@@ -386,13 +409,15 @@ def test_main():
if gzip:
tests.extend([
ReadTestGzip, ReadStreamTestGzip,
- WriteTestGzip, WriteStreamTestGzip
+ WriteTestGzip, WriteStreamTestGzip,
+ ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip
])
if bz2:
tests.extend([
ReadTestBzip2, ReadStreamTestBzip2,
- WriteTestBzip2, WriteStreamTestBzip2
+ WriteTestBzip2, WriteStreamTestBzip2,
+ ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2
])
try:
test_support.run_unittest(*tests)
diff --git a/Misc/NEWS b/Misc/NEWS
index 08a8f43..3db47a3 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -78,6 +78,9 @@ Extension Modules
Library
-------
+- Patch #918101: Add tarfile open mode r|* for auto-detection of the
+ stream compression; add, for symmetry reasons, r:* as a synonym of r.
+
- Patch #1043890: Add extractall method to tarfile.
- Patch #1075887: Don't require MSVC in distutils if there is nothing