summaryrefslogtreecommitdiffstats
path: root/Lib/dos-8x3/mimepars.py
blob: 46fe9ebb2034731bef2ebece3936231e86839eee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
"""Generic MIME parser.

Classes:

        MimeParser - Generic MIME parser.

Exceptions:

        MimeError - Exception raised by MimeParser class.

XXX To do:

- Content-transfer-encoding issues
- Use Content-length header in rawbody()?
- Cache parts instead of reparsing each time
- The message strings in exceptions could use some work

"""

from types import *                     # Python types, not MIME types :-)
import string
import regex
import SubFile
import mimetools


MimeError = "MimeParser.MimeError"      # Exception raised by this class


class MimeParser:

    """Generic MIME parser.

    This requires a seekable file.

    """

    def __init__(self, fp):
        """Constructor: store the file pointer and parse the headers."""
        self._fp = fp
        self._start = fp.tell()
        self._headers = h = mimetools.Message(fp)
        self._bodystart = fp.tell()
        self._multipart = h.getmaintype() == 'multipart'

    def multipart(self):
        """Return whether this is a multipart message."""
        return self._multipart

    def headers(self):
        """Return the headers of the MIME message, as a Message object."""
        return self._headers

    def rawbody(self):
        """Return the raw body of the MIME message, as a file-like object.

        This is a fairly low-level interface -- for a multipart
        message, you'd have to parse the body yourself, and it doesn't
        translate the Content-transfer-encoding.
        
        """
        # XXX Use Content-length to set end if it exists?
        return SubFile.SubFile(self._fp, self._bodystart)

    def body(self):
        """Return the body of a 1-part MIME message, as a file-like object.

        This should interpret the Content-transfer-encoding, if any
        (XXX currently it doesn't).
        
        """
        if self._multipart:
            raise MimeError, "body() only works for 1-part messages"
        return self.rawbody()

    _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)',
                                       regex.casefold)

    def rawparts(self):
        """Return the raw body parts of a multipart MIME message.

        This returns a list of SubFile() objects corresponding to the
        parts.  Note that the phantom part before the first separator
        is returned too, as list item 0.  If the final part is not
        followed by a terminator, it is ignored, and this error is not
        reported.  (XXX: the error should be raised).

        """
        if not self._multipart:
            raise MimeError, "[raw]parts() only works for multipart messages"
        h = self._headers
        separator = h.getparam('boundary')
        if not separator:
            raise MimeError, "multipart boundary not specified"
        separator = "--" + separator
        terminator = separator + "--"
        ns = len(separator)
        list = []
        f = self._fp
        start = f.tell()
        clength = -1
        bodystart = -1
        inheaders = 0
        while 1:
            end = f.tell()
            line = f.readline()
            if not line:
                break
            if line[:2] != "--" or line[:ns] != separator:
                if inheaders:
                    re = self._re_content_length
                    if re.match(line) > 0:
                        try:
                            clength = string.atoi(re.group(1))
                        except string.atoi_error:
                            pass
                    if not string.strip(line):
                        inheaders = 0
                        bodystart = f.tell()
                        if clength > 0:
                            # Skip binary data
                            f.read(clength)
                continue
            line = string.strip(line)
            if line == terminator or line == separator:
                if clength >= 0:
                    # The Content-length header determines the subfile size
                    end = bodystart + clength
                else:
                    # The final newline is not part of the content
                    end = end-1
                list.append(SubFile.SubFile(f, start, end))
                start = f.tell()
                clength = -1
                inheaders = 1
                if line == terminator:
                    break
        return list

    def parts(self):
        """Return the parsed body parts of a multipart MIME message.

        This returns a list of MimeParser() instances corresponding to
        the parts.  The phantom part before the first separator is not
        included.

        """
        return map(MimeParser, self.rawparts()[1:])

    def getsubpartbyposition(self, indices):
        part = self
        for i in indices:
            part = part.parts()[i]
        return part

    def getsubpartbyid(self, id):
        h = self._headers
        cid = h.getheader('content-id')
        if cid and cid == id:
            return self
        if self._multipart:
            for part in self.parts():
                parser = MimeParser(part)
                hit = parser.getsubpartbyid(id)
                if hit:
                    return hit
        return None

    def index(self):
        """Return an index of the MIME file.

        This parses the entire file and returns index information
        about it, in the form of a tuple

            (ctype, headers, body)

        where 'ctype' is the content type string of the message
        (e.g. `text/plain' or `multipart/mixed') and 'headers' is a
        Message instance containing the message headers (which should
        be treated as read-only).

        The 'body' item depends on the content type:

        - If it is an atomic message (anything except for content type
          multipart/*), it is the file-like object returned by
          self.body().

        - For a content type of multipart/*, it is the list of
          MimeParser() objects returned by self.parts().

        """
        if self._multipart:
            body = self.parts()
        else:
            body = self.body()
        return self._headers.gettype(), self._headers, body


def _show(parser, level=0):
    """Helper for _test()."""
    ctype, headers, body = parser.index()
    print ctype,
    if type(body) == ListType:
        nparts = len(body)
        print "(%d part%s):" % (nparts, nparts != 1 and "s" or "")
        n = 0
        for part in body:
            n = n+1
            print "%*d." % (4*level+2, n),
            _show(part, level+1)
    else:
        bodylines = body.readlines()
        print "(%d header lines, %d body lines)" % (
            len(headers.headers), len(bodylines))
        for line in headers.headers + ['\n'] + bodylines:
            if line[-1:] == '\n': line = line[:-1]
            print "    "*level + line

def _test(args = None):
    """Test program invoked when run as a script.

    When a filename argument is specified, it reads from that file.
    When no arguments are present, it defaults to 'testkp.txt' if it
    exists, else it defaults to stdin.

    """
    if not args:
        import sys
        args = sys.argv[1:]
    if args:
        fn = args[0]
    else:
        import os
        fn = 'testkp.txt'
        if not os.path.exists(fn):
            fn = '-'
    if fn == '-':
        fp = sys.stdin
    else:
        fp = open(fn)
    mp = MimeParser(fp)
    _show(mp)

if __name__ == '__main__':
    import sys
    _test()