summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-06-06 16:43:59 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-06-06 16:43:59 (GMT)
commit5b1284d0b757f28d97fb21d487b4fe19a858c88f (patch)
treee95b77d4e44a31dd6826e1ba81d8d8659cc8af62
parent3a77c7ab16d737a19cfb3fae4bd0f92517abe149 (diff)
downloadcpython-5b1284d0b757f28d97fb21d487b4fe19a858c88f.zip
cpython-5b1284d0b757f28d97fb21d487b4fe19a858c88f.tar.gz
cpython-5b1284d0b757f28d97fb21d487b4fe19a858c88f.tar.bz2
Fix gzip.py: Use bytes where 8bit strings have been used formerly.
(The filename gets written in utf-8 encoded form which probably isn't correct.) Fix the test.
-rw-r--r--Lib/gzip.py42
-rw-r--r--Lib/test/test_gzip.py18
2 files changed, 31 insertions, 29 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index fd72b9e..b6cc80e 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -104,7 +104,7 @@ class GzipFile:
self.mode = READ
# Set flag indicating start of a new member
self._new_member = True
- self.extrabuf = ""
+ self.extrabuf = b""
self.extrasize = 0
self.name = filename
# Starts small, scales exponentially
@@ -147,20 +147,21 @@ class GzipFile:
self.bufsize = 0
def _write_gzip_header(self):
- self.fileobj.write('\037\213') # magic header
- self.fileobj.write('\010') # compression method
+ self.fileobj.write(b'\037\213') # magic header
+ self.fileobj.write(b'\010') # compression method
fname = self.name
if fname.endswith(".gz"):
fname = fname[:-3]
flags = 0
if fname:
flags = FNAME
- self.fileobj.write(chr(flags))
+ self.fileobj.write(chr(flags).encode('latin-1'))
write32u(self.fileobj, int(time.time()))
- self.fileobj.write('\002')
- self.fileobj.write('\377')
+ self.fileobj.write(b'\002')
+ self.fileobj.write(b'\377')
if fname:
- self.fileobj.write(fname + '\000')
+ # XXX: Ist utf-8 the correct encoding?
+ self.fileobj.write(fname.encode('utf-8') + b'\000')
def _init_read(self):
self.crc = zlib.crc32("")
@@ -168,7 +169,7 @@ class GzipFile:
def _read_gzip_header(self):
magic = self.fileobj.read(2)
- if magic != '\037\213':
+ if magic != b'\037\213':
raise IOError, 'Not a gzipped file'
method = ord( self.fileobj.read(1) )
if method != 8:
@@ -188,13 +189,13 @@ class GzipFile:
# Read and discard a null-terminated string containing the filename
while True:
s = self.fileobj.read(1)
- if not s or s=='\000':
+ if not s or s==b'\000':
break
if flag & FCOMMENT:
# Read and discard a null-terminated string containing a comment
while True:
s = self.fileobj.read(1)
- if not s or s=='\000':
+ if not s or s==b'\000':
break
if flag & FHCRC:
self.fileobj.read(2) # Read & discard the 16-bit header CRC
@@ -219,7 +220,7 @@ class GzipFile:
raise IOError(errno.EBADF, "read() on write-only GzipFile object")
if self.extrasize <= 0 and self.fileobj is None:
- return ''
+ return b''
readsize = 1024
if size < 0: # get the whole thing
@@ -278,7 +279,7 @@ class GzipFile:
# If the EOF has been reached, flush the decompression object
# and mark this object as finished.
- if buf == "":
+ if buf == b"":
uncompress = self.decompress.flush()
self._read_eof()
self._add_read_data( uncompress )
@@ -287,7 +288,7 @@ class GzipFile:
uncompress = self.decompress.decompress(buf)
self._add_read_data( uncompress )
- if self.decompress.unused_data != "":
+ if self.decompress.unused_data != b"":
# Ending case: we've come to the end of a member in the file,
# so seek back to the start of the unused data, finish up
# this member, and read a new gzip header.
@@ -375,7 +376,7 @@ class GzipFile:
raise IOError("Can't rewind in write mode")
self.fileobj.seek(0)
self._new_member = True
- self.extrabuf = ""
+ self.extrabuf = b""
self.extrasize = 0
self.offset = 0
@@ -389,9 +390,10 @@ class GzipFile:
if offset < self.offset:
raise IOError('Negative seek in write mode')
count = offset - self.offset
+ chunk = bytes(1024)
for i in range(count // 1024):
- self.write(1024 * '\0')
- self.write((count % 1024) * '\0')
+ self.write(chunk)
+ self.write(bytes(count % 1024))
elif self.mode == READ:
if offset < self.offset:
# for negative seek, rewind and do positive seek
@@ -410,7 +412,7 @@ class GzipFile:
bufs = []
while size != 0:
c = self.read(readsize)
- i = c.find('\n')
+ i = c.find(b'\n')
# We set i=size to break out of the loop under two
# conditions: 1) there's no newline, and the chunk is
@@ -419,7 +421,7 @@ class GzipFile:
if (size <= i) or (i == -1 and len(c) > size):
i = size - 1
- if i >= 0 or c == '':
+ if i >= 0 or c == b'':
bufs.append(c[:i + 1]) # Add portion of last chunk
self._unread(c[i + 1:]) # Push back rest of chunk
break
@@ -430,7 +432,7 @@ class GzipFile:
readsize = min(size, readsize * 2)
if readsize > self.min_readsize:
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
- return ''.join(bufs) # Return resulting line
+ return b''.join(bufs) # Return resulting line
def readlines(self, sizehint=0):
# Negative numbers result in reading all the lines
@@ -439,7 +441,7 @@ class GzipFile:
L = []
while sizehint > 0:
line = self.readline()
- if line == "":
+ if line == b"":
break
L.append(line)
sizehint = sizehint - len(line)
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index 124a469..31000df 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -8,14 +8,14 @@ import sys, os
import gzip
-data1 = """ int length=DEFAULTALLOC, err = Z_OK;
+data1 = b""" int length=DEFAULTALLOC, err = Z_OK;
PyObject *RetVal;
int flushmode = Z_FINISH;
unsigned long start_total_out;
"""
-data2 = """/* zlibmodule.c -- gzip-compatible data compression */
+data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
/* See http://www.gzip.org/zlib/
/* See http://www.winimage.com/zLibDll for Windows */
"""
@@ -63,22 +63,22 @@ class TestGzip(unittest.TestCase):
# many, many members. Create such a file and verify that reading it
# works.
f = gzip.open(self.filename, 'wb', 9)
- f.write('a')
+ f.write(b'a')
f.close()
- for i in range(0,200):
+ for i in range(0, 200):
f = gzip.open(self.filename, "ab", 9) # append
- f.write('a')
+ f.write(b'a')
f.close()
# Try reading the file
zgfile = gzip.open(self.filename, "rb")
- contents = ""
+ contents = b""
while 1:
ztxt = zgfile.read(8192)
contents += ztxt
if not ztxt: break
zgfile.close()
- self.assertEquals(contents, 'a'*201)
+ self.assertEquals(contents, b'a'*201)
def test_readline(self):
@@ -89,7 +89,7 @@ class TestGzip(unittest.TestCase):
line_length = 0
while 1:
L = f.readline(line_length)
- if L == "" and line_length != 0: break
+ if not L and line_length != 0: break
self.assert_(len(L) <= line_length)
line_length = (line_length + 1) % 50
f.close()
@@ -144,7 +144,7 @@ class TestGzip(unittest.TestCase):
f = gzip.GzipFile(self.filename, 'w')
for pos in range(0, 256, 16):
f.seek(pos)
- f.write('GZ\n')
+ f.write(b'GZ\n')
f.close()
def test_mode(self):