diff options
Diffstat (limited to 'Lib/cgi.py')
-rwxr-xr-x | Lib/cgi.py | 105 |
1 files changed, 14 insertions, 91 deletions
@@ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): DeprecationWarning, 2) return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) -def parse_multipart(fp, pdict): +def parse_multipart(fp, pdict, encoding="utf-8"): """Parse multipart input. Arguments: fp : input file pdict: dictionary containing other parameters of content-type header + encoding: request encoding Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. This is easy to use but not - much good if you are expecting megabytes to be uploaded -- in that case, - use the FieldStorage class instead which is much more flexible. Note - that content-type is the raw, unparsed contents of the content-type - header. - - XXX This does not parse nested multipart parts -- use FieldStorage for - that. - - XXX This should really be subsumed by FieldStorage altogether -- no - point in having two implementations of the same parsing algorithm. - Also, FieldStorage protects itself better against certain DoS attacks - by limiting the size of the data read in one chunk. The API here - does not support that kind of protection. This also affects parse() - since it can call parse_multipart(). - + value is a list of values for that field. For non-file fields, the value + is a list of strings. """ - import http.client - - boundary = b"" - if 'boundary' in pdict: - boundary = pdict['boundary'] - if not valid_boundary(boundary): - raise ValueError('Invalid boundary in multipart form: %r' - % (boundary,)) - - nextpart = b"--" + boundary - lastpart = b"--" + boundary + b"--" - partdict = {} - terminator = b"" - - while terminator != lastpart: - bytes = -1 - data = None - if terminator: - # At start of next part. Read headers first. - headers = http.client.parse_headers(fp) - clength = headers.get('content-length') - if clength: - try: - bytes = int(clength) - except ValueError: - pass - if bytes > 0: - if maxlen and bytes > maxlen: - raise ValueError('Maximum content length exceeded') - data = fp.read(bytes) - else: - data = b"" - # Read lines until end of part. - lines = [] - while 1: - line = fp.readline() - if not line: - terminator = lastpart # End outer loop - break - if line.startswith(b"--"): - terminator = line.rstrip() - if terminator in (nextpart, lastpart): - break - lines.append(line) - # Done with part. - if data is None: - continue - if bytes < 0: - if lines: - # Strip final line terminator - line = lines[-1] - if line[-2:] == b"\r\n": - line = line[:-2] - elif line[-1:] == b"\n": - line = line[:-1] - lines[-1] = line - data = b"".join(lines) - line = headers['content-disposition'] - if not line: - continue - key, params = parse_header(line) - if key != 'form-data': - continue - if 'name' in params: - name = params['name'] - else: - continue - if name in partdict: - partdict[name].append(data) - else: - partdict[name] = [data] - - return partdict - + # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always + # represented as 7bit US-ASCII. + boundary = pdict['boundary'].decode('ascii') + ctype = "multipart/form-data; boundary={}".format(boundary) + headers = Message() + headers.set_type(ctype) + headers['Content-Length'] = pdict['CONTENT-LENGTH'] + fs = FieldStorage(fp, headers=headers, encoding=encoding, + environ={'REQUEST_METHOD': 'POST'}) + return {k: fs.getlist(k) for k in fs} def _parseparam(s): while s[:1] == ';': |