diff options
-rw-r--r-- | Doc/lib/libtarfile.tex | 3 | ||||
-rw-r--r-- | Lib/tarfile.py | 96 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 29 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 98 insertions, 33 deletions
diff --git a/Doc/lib/libtarfile.tex b/Doc/lib/libtarfile.tex index 018e8b9..9088473 100644 --- a/Doc/lib/libtarfile.tex +++ b/Doc/lib/libtarfile.tex @@ -32,7 +32,7 @@ Some facts and figures: it defaults to \code{'r'}. Here is a full list of mode combinations: \begin{tableii}{c|l}{code}{mode}{action} - \lineii{'r'}{Open for reading with transparent compression (recommended).} + \lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).} \lineii{'r:'}{Open for reading exclusively without compression.} \lineii{'r:gz'}{Open for reading with gzip compression.} \lineii{'r:bz2'}{Open for reading with bzip2 compression.} @@ -65,6 +65,7 @@ Some facts and figures: (section~\ref{tar-examples}). The currently possible modes: \begin{tableii}{c|l}{code}{Mode}{Action} + \lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.} \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.} \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.} \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.} diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 8bce5d0..56cce03 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -274,7 +274,7 @@ class _Stream: _Stream is intended to be used only internally. """ - def __init__(self, name, mode, type, fileobj, bufsize): + def __init__(self, name, mode, comptype, fileobj, bufsize): """Construct a _Stream object. """ self._extfileobj = True @@ -282,16 +282,22 @@ class _Stream: fileobj = _LowLevelFile(name, mode) self._extfileobj = False - self.name = name or "" - self.mode = mode - self.type = type - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = "" - self.pos = 0L - self.closed = False - - if type == "gz": + if comptype == '*': + # Enable transparent compression detection for the + # stream interface + fileobj = _StreamProxy(fileobj) + comptype = fileobj.getcomptype() + + self.name = name or "" + self.mode = mode + self.comptype = comptype + self.fileobj = fileobj + self.bufsize = bufsize + self.buf = "" + self.pos = 0L + self.closed = False + + if comptype == "gz": try: import zlib except ImportError: @@ -303,7 +309,7 @@ class _Stream: else: self._init_write_gz() - if type == "bz2": + if comptype == "bz2": try: import bz2 except ImportError: @@ -315,7 +321,7 @@ class _Stream: self.cmp = bz2.BZ2Compressor() def __del__(self): - if not self.closed: + if hasattr(self, "closed") and not self.closed: self.close() def _init_write_gz(self): @@ -334,10 +340,10 @@ class _Stream: def write(self, s): """Write string s to the stream. """ - if self.type == "gz": + if self.comptype == "gz": self.crc = self.zlib.crc32(s, self.crc) self.pos += len(s) - if self.type != "tar": + if self.comptype != "tar": s = self.cmp.compress(s) self.__write(s) @@ -357,12 +363,16 @@ class _Stream: if self.closed: return - if self.mode == "w" and self.type != "tar": + if self.mode == "w" and self.comptype != "tar": self.buf += self.cmp.flush() + if self.mode == "w" and self.buf: + blocks, remainder = divmod(len(self.buf), self.bufsize) + if remainder > 0: + self.buf += NUL * (self.bufsize - remainder) self.fileobj.write(self.buf) self.buf = "" - if self.type == "gz": + if self.comptype == "gz": self.fileobj.write(struct.pack("<l", self.crc)) self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) @@ -441,7 +451,7 @@ class _Stream: def _read(self, size): """Return size bytes from the stream. """ - if self.type == "tar": + if self.comptype == "tar": return self.__read(size) c = len(self.dbuf) @@ -474,6 +484,30 @@ class _Stream: return t[:size] # class _Stream +class _StreamProxy(object): + """Small proxy class that enables transparent compression + detection for the Stream interface (mode 'r|*'). + """ + + def __init__(self, fileobj): + self.fileobj = fileobj + self.buf = self.fileobj.read(BLOCKSIZE) + + def read(self, size): + self.read = self.fileobj.read + return self.buf + + def getcomptype(self): + if self.buf.startswith("\037\213\010"): + return "gz" + if self.buf.startswith("BZh91"): + return "bz2" + return "tar" + + def close(self): + self.fileobj.close() +# class StreamProxy + #------------------------ # Extraction file object #------------------------ @@ -879,7 +913,7 @@ class TarFile(object): an appropriate TarFile class. mode: - 'r' open for reading with transparent compression + 'r' or 'r:*' open for reading with transparent compression 'r:' open for reading exclusively uncompressed 'r:gz' open for reading with gzip compression 'r:bz2' open for reading with bzip2 compression @@ -887,6 +921,8 @@ class TarFile(object): 'w' or 'w:' open for writing without compression 'w:gz' open for writing with gzip compression 'w:bz2' open for writing with bzip2 compression + + 'r|*' open a stream of tar blocks with transparent compression 'r|' open an uncompressed stream of tar blocks for reading 'r|gz' open a gzip compressed stream of tar blocks 'r|bz2' open a bzip2 compressed stream of tar blocks @@ -898,7 +934,17 @@ class TarFile(object): if not name and not fileobj: raise ValueError, "nothing to open" - if ":" in mode: + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in cls.OPEN_METH: + func = getattr(cls, cls.OPEN_METH[comptype]) + try: + return func(name, "r", fileobj) + except (ReadError, CompressionError): + continue + raise ReadError, "file could not be opened successfully" + + elif ":" in mode: filemode, comptype = mode.split(":", 1) filemode = filemode or "r" comptype = comptype or "tar" @@ -924,16 +970,6 @@ class TarFile(object): t._extfileobj = False return t - elif mode == "r": - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - try: - return func(name, "r", fileobj) - except (ReadError, CompressionError): - continue - raise ReadError, "file could not be opened successfully" - elif mode in "aw": return cls.taropen(name, mode, fileobj) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index a6c4c4a..cc5e505 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -181,6 +181,18 @@ class ReadStreamTest(ReadTest): stream.close() +class ReadAsteriskTest(ReadTest): + + def setUp(self): + mode = self.mode + self.sep + "*" + self.tar = tarfile.open(tarname(self.comp), mode) + +class ReadStreamAsteriskTest(ReadStreamTest): + + def setUp(self): + mode = self.mode + self.sep + "*" + self.tar = tarfile.open(tarname(self.comp), mode) + class WriteTest(BaseTest): mode = 'w' @@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest): comp = "gz" class WriteStreamTestGzip(WriteStreamTest): comp = "gz" +class ReadAsteriskTestGzip(ReadAsteriskTest): + comp = "gz" +class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest): + comp = "gz" + # Filemode test cases @@ -355,6 +372,10 @@ if bz2: comp = "bz2" class WriteStreamTestBzip2(WriteStreamTestGzip): comp = "bz2" + class ReadAsteriskTestBzip2(ReadAsteriskTest): + comp = "bz2" + class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest): + comp = "bz2" # If importing gzip failed, discard the Gzip TestCases. if not gzip: @@ -375,6 +396,8 @@ def test_main(): FileModeTest, ReadTest, ReadStreamTest, + ReadAsteriskTest, + ReadStreamAsteriskTest, WriteTest, WriteStreamTest, WriteGNULongTest, @@ -386,13 +409,15 @@ def test_main(): if gzip: tests.extend([ ReadTestGzip, ReadStreamTestGzip, - WriteTestGzip, WriteStreamTestGzip + WriteTestGzip, WriteStreamTestGzip, + ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip ]) if bz2: tests.extend([ ReadTestBzip2, ReadStreamTestBzip2, - WriteTestBzip2, WriteStreamTestBzip2 + WriteTestBzip2, WriteStreamTestBzip2, + ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2 ]) try: test_support.run_unittest(*tests) @@ -78,6 +78,9 @@ Extension Modules Library ------- +- Patch #918101: Add tarfile open mode r|* for auto-detection of the + stream compression; add, for symmetry reasons, r:* as a synonym of r. + - Patch #1043890: Add extractall method to tarfile. - Patch #1075887: Don't require MSVC in distutils if there is nothing |