1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
#!/usr/bin/env python3
# Demo program for zlib; it compresses or decompresses files, but *doesn't*
# delete the original. This doesn't support all of gzip's options.
#
# The 'gzip' module in the standard library provides a more complete
# implementation of gzip-format files.
import zlib, sys, os
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
def write32(output, value):
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255])) ; value=value // 256
output.write(bytes([value & 255]))
def read32(input):
v = ord(input.read(1))
v += (ord(input.read(1)) << 8 )
v += (ord(input.read(1)) << 16)
v += (ord(input.read(1)) << 24)
return v
def compress(filename, input, output):
output.write(b'\037\213\010') # Write the header, ...
output.write(bytes([FNAME])) # ... flag byte ...
statval = os.stat(filename) # ... modification time ...
mtime = statval[8]
write32(output, mtime)
output.write(b'\002') # ... slowest compression alg. ...
output.write(b'\377') # ... OS (=unknown) ...
bfilename = os.fsencode(filename)
output.write(bfilename + b'\000') # ... original filename ...
crcval = zlib.crc32(b'')
compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
zlib.DEF_MEM_LEVEL, 0)
while True:
data = input.read(1024)
if data == b'':
break
crcval = zlib.crc32(data, crcval)
output.write(compobj.compress(data))
output.write(compobj.flush())
write32(output, crcval) # ... the CRC ...
write32(output, statval[6]) # and the file size.
def decompress(input, output):
magic = input.read(2)
if magic != b'\037\213':
print('Not a gzipped file')
sys.exit(0)
if ord(input.read(1)) != 8:
print('Unknown compression method')
sys.exit(0)
flag = ord(input.read(1))
input.read(4+1+1) # Discard modification time,
# extra flags, and OS byte.
if flag & FEXTRA:
# Read & discard the extra field, if present
xlen = ord(input.read(1))
xlen += 256*ord(input.read(1))
input.read(xlen)
if flag & FNAME:
# Read and discard a null-terminated string containing the filename
while True:
s = input.read(1)
if s == b'\0': break
if flag & FCOMMENT:
# Read and discard a null-terminated string containing a comment
while True:
s = input.read(1)
if s == b'\0': break
if flag & FHCRC:
input.read(2) # Read & discard the 16-bit header CRC
decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
crcval = zlib.crc32(b'')
length = 0
while True:
data = input.read(1024)
if data == b"":
break
decompdata = decompobj.decompress(data)
output.write(decompdata)
length += len(decompdata)
crcval = zlib.crc32(decompdata, crcval)
decompdata = decompobj.flush()
output.write(decompdata)
length += len(decompdata)
crcval = zlib.crc32(decompdata, crcval)
# We've read to the end of the file, so we have to rewind in order
# to reread the 8 bytes containing the CRC and the file size. The
# decompressor is smart and knows when to stop, so feeding it
# extra data is harmless.
input.seek(-8, 2)
crc32 = read32(input)
isize = read32(input)
if crc32 != crcval:
print('CRC check failed.')
if isize != length:
print('Incorrect length of data produced')
def main():
if len(sys.argv)!=2:
print('Usage: minigzip.py <filename>')
print(' The file will be compressed or decompressed.')
sys.exit(0)
filename = sys.argv[1]
if filename.endswith('.gz'):
compressing = False
outputname = filename[:-3]
else:
compressing = True
outputname = filename + '.gz'
input = open(filename, 'rb')
output = open(outputname, 'wb')
if compressing:
compress(filename, input, output)
else:
decompress(input, output)
input.close()
output.close()
if __name__ == '__main__':
main()
|