1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
|
"""Generic MIME parser.
Classes:
MimeParser - Generic MIME parser.
Exceptions:
MimeError - Exception raised by MimeParser class.
XXX To do:
- Content-transfer-encoding issues
- Use Content-length header in rawbody()?
- Cache parts instead of reparsing each time
- The message strings in exceptions could use some work
"""
from types import * # Python types, not MIME types :-)
import string
import regex
import SubFile
import mimetools
MimeError = "MimeParser.MimeError" # Exception raised by this class
class MimeParser:
"""Generic MIME parser.
This requires a seekable file.
"""
def __init__(self, fp):
"""Constructor: store the file pointer and parse the headers."""
self._fp = fp
self._start = fp.tell()
self._headers = h = mimetools.Message(fp)
self._bodystart = fp.tell()
self._multipart = h.getmaintype() == 'multipart'
def multipart(self):
"""Return whether this is a multipart message."""
return self._multipart
def headers(self):
"""Return the headers of the MIME message, as a Message object."""
return self._headers
def rawbody(self):
"""Return the raw body of the MIME message, as a file-like object.
This is a fairly low-level interface -- for a multipart
message, you'd have to parse the body yourself, and it doesn't
translate the Content-transfer-encoding.
"""
# XXX Use Content-length to set end if it exists?
return SubFile.SubFile(self._fp, self._bodystart)
def body(self):
"""Return the body of a 1-part MIME message, as a file-like object.
This should interpret the Content-transfer-encoding, if any
(XXX currently it doesn't).
"""
if self._multipart:
raise MimeError, "body() only works for 1-part messages"
return self.rawbody()
_re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
regex.casefold)
def rawparts(self):
"""Return the raw body parts of a multipart MIME message.
This returns a list of SubFile() objects corresponding to the
parts. Note that the phantom part before the first separator
is returned too, as list item 0. If the final part is not
followed by a terminator, it is ignored, and this error is not
reported. (XXX: the error should be raised).
"""
if not self._multipart:
raise MimeError, "[raw]parts() only works for multipart messages"
h = self._headers
separator = h.getparam('boundary')
if not separator:
raise MimeError, "multipart boundary not specified"
separator = "--" + separator
terminator = separator + "--"
ns = len(separator)
list = []
f = self._fp
start = f.tell()
clength = -1
bodystart = -1
inheaders = 0
while 1:
end = f.tell()
line = f.readline()
if not line:
break
if line[:2] != "--" or line[:ns] != separator:
if inheaders:
re = self._re_content_length
if re.match(line) > 0:
try:
clength = string.atoi(re.group(1))
except string.atoi_error:
pass
if not string.strip(line):
inheaders = 0
bodystart = f.tell()
if clength > 0:
# Skip binary data
f.read(clength)
continue
line = string.strip(line)
if line == terminator or line == separator:
if clength >= 0:
# The Content-length header determines the subfile size
end = bodystart + clength
else:
# The final newline is not part of the content
end = end-1
list.append(SubFile.SubFile(f, start, end))
start = f.tell()
clength = -1
inheaders = 1
if line == terminator:
break
return list
def parts(self):
"""Return the parsed body parts of a multipart MIME message.
This returns a list of MimeParser() instances corresponding to
the parts. The phantom part before the first separator is not
included.
"""
return map(MimeParser, self.rawparts()[1:])
def getsubpartbyposition(self, indices):
part = self
for i in indices:
part = part.parts()[i]
return part
def getsubpartbyid(self, id):
h = self._headers
cid = h.getheader('content-id')
if cid and cid == id:
return self
if self._multipart:
for part in self.parts():
parser = MimeParser(part)
hit = parser.getsubpartbyid(id)
if hit:
return hit
return None
def index(self):
"""Return an index of the MIME file.
This parses the entire file and returns index information
about it, in the form of a tuple
(ctype, headers, body)
where 'ctype' is the content type string of the message
(e.g. `text/plain' or `multipart/mixed') and 'headers' is a
Message instance containing the message headers (which should
be treated as read-only).
The 'body' item depends on the content type:
- If it is an atomic message (anything except for content type
multipart/*), it is the file-like object returned by
self.body().
- For a content type of multipart/*, it is the list of
MimeParser() objects returned by self.parts().
"""
if self._multipart:
body = self.parts()
else:
body = self.body()
return self._headers.gettype(), self._headers, body
def _show(parser, level=0):
"""Helper for _test()."""
ctype, headers, body = parser.index()
print ctype,
if type(body) == ListType:
nparts = len(body)
print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
n = 0
for part in body:
n = n+1
print "%*d." % (4*level+2, n),
_show(part, level+1)
else:
bodylines = body.readlines()
print "(%d header lines, %d body lines)" % (
len(headers.headers), len(bodylines))
for line in headers.headers + ['\n'] + bodylines:
if line[-1:] == '\n': line = line[:-1]
print " "*level + line
def _test(args = None):
"""Test program invoked when run as a script.
When a filename argument is specified, it reads from that file.
When no arguments are present, it defaults to 'testkp.txt' if it
exists, else it defaults to stdin.
"""
if not args:
import sys
args = sys.argv[1:]
if args:
fn = args[0]
else:
import os
fn = 'testkp.txt'
if not os.path.exists(fn):
fn = '-'
if fn == '-':
fp = sys.stdin
else:
fp = open(fn)
mp = MimeParser(fp)
_show(mp)
if __name__ == '__main__':
import sys
_test()
|