summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPierre Quentel <pierre.quentel@gmail.com>2017-05-08 12:08:34 (GMT)
committerSenthil Kumaran <skumaran@gatech.edu>2017-05-08 12:08:34 (GMT)
commitcc3fa204d357be5fafc10eb8c2a80fe0bca998f1 (patch)
treebd84defa311575c96461db01238cd231d94c439f
parentf34c6850203a2406c4950af7a9c8a134145df4ea (diff)
downloadcpython-cc3fa204d357be5fafc10eb8c2a80fe0bca998f1.zip
cpython-cc3fa204d357be5fafc10eb8c2a80fe0bca998f1.tar.gz
cpython-cc3fa204d357be5fafc10eb8c2a80fe0bca998f1.tar.bz2
bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)
-rw-r--r--Doc/library/cgi.rst19
-rw-r--r--Doc/whatsnew/3.7.rst8
-rwxr-xr-xLib/cgi.py105
-rw-r--r--Lib/test/test_cgi.py4
-rw-r--r--Misc/NEWS4
5 files changed, 38 insertions, 102 deletions
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 41219ee..b60e1cc 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -294,19 +294,20 @@ algorithms implemented in this module in other circumstances.
This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl`
instead. It is maintained here only for backward compatibility.
-.. function:: parse_multipart(fp, pdict)
+.. function:: parse_multipart(fp, pdict, encoding="utf-8")
Parse input of type :mimetype:`multipart/form-data` (for file uploads).
- Arguments are *fp* for the input file and *pdict* for a dictionary containing
- other parameters in the :mailheader:`Content-Type` header.
+ Arguments are *fp* for the input file, *pdict* for a dictionary containing
+ other parameters in the :mailheader:`Content-Type` header, and *encoding*,
+ the request encoding.
- Returns a dictionary just like :func:`urllib.parse.parse_qs` keys are the field names, each
- value is a list of values for that field. This is easy to use but not much good
- if you are expecting megabytes to be uploaded --- in that case, use the
- :class:`FieldStorage` class instead which is much more flexible.
+ Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the
+ field names, each value is a list of values for that field. For non-file
+ fields, the value is a list of strings.
- Note that this does not parse nested multipart parts --- use
- :class:`FieldStorage` for that.
+ This is easy to use but not much good if you are expecting megabytes to be
+ uploaded --- in that case, use the :class:`FieldStorage` class instead
+ which is much more flexible.
.. function:: parse_header(string)
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 7edf4fc..b2dc995 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -95,6 +95,14 @@ New Modules
Improved Modules
================
+cgi
+---
+
+:func:`~cgi.parse_multipart` returns the same results as
+:class:`~FieldStorage` : for non-file fields, the value associated to a key
+is a list of strings, not bytes.
+(Contributed by Pierre Quentel in :issue:`29979`.)
+
binascii
--------
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 14d15a6..f5e85aa 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
DeprecationWarning, 2)
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
-def parse_multipart(fp, pdict):
+def parse_multipart(fp, pdict, encoding="utf-8"):
"""Parse multipart input.
Arguments:
fp : input file
pdict: dictionary containing other parameters of content-type header
+ encoding: request encoding
Returns a dictionary just like parse_qs(): keys are the field names, each
- value is a list of values for that field. This is easy to use but not
- much good if you are expecting megabytes to be uploaded -- in that case,
- use the FieldStorage class instead which is much more flexible. Note
- that content-type is the raw, unparsed contents of the content-type
- header.
-
- XXX This does not parse nested multipart parts -- use FieldStorage for
- that.
-
- XXX This should really be subsumed by FieldStorage altogether -- no
- point in having two implementations of the same parsing algorithm.
- Also, FieldStorage protects itself better against certain DoS attacks
- by limiting the size of the data read in one chunk. The API here
- does not support that kind of protection. This also affects parse()
- since it can call parse_multipart().
-
+ value is a list of values for that field. For non-file fields, the value
+ is a list of strings.
"""
- import http.client
-
- boundary = b""
- if 'boundary' in pdict:
- boundary = pdict['boundary']
- if not valid_boundary(boundary):
- raise ValueError('Invalid boundary in multipart form: %r'
- % (boundary,))
-
- nextpart = b"--" + boundary
- lastpart = b"--" + boundary + b"--"
- partdict = {}
- terminator = b""
-
- while terminator != lastpart:
- bytes = -1
- data = None
- if terminator:
- # At start of next part. Read headers first.
- headers = http.client.parse_headers(fp)
- clength = headers.get('content-length')
- if clength:
- try:
- bytes = int(clength)
- except ValueError:
- pass
- if bytes > 0:
- if maxlen and bytes > maxlen:
- raise ValueError('Maximum content length exceeded')
- data = fp.read(bytes)
- else:
- data = b""
- # Read lines until end of part.
- lines = []
- while 1:
- line = fp.readline()
- if not line:
- terminator = lastpart # End outer loop
- break
- if line.startswith(b"--"):
- terminator = line.rstrip()
- if terminator in (nextpart, lastpart):
- break
- lines.append(line)
- # Done with part.
- if data is None:
- continue
- if bytes < 0:
- if lines:
- # Strip final line terminator
- line = lines[-1]
- if line[-2:] == b"\r\n":
- line = line[:-2]
- elif line[-1:] == b"\n":
- line = line[:-1]
- lines[-1] = line
- data = b"".join(lines)
- line = headers['content-disposition']
- if not line:
- continue
- key, params = parse_header(line)
- if key != 'form-data':
- continue
- if 'name' in params:
- name = params['name']
- else:
- continue
- if name in partdict:
- partdict[name].append(data)
- else:
- partdict[name] = [data]
-
- return partdict
-
+ # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always
+ # represented as 7bit US-ASCII.
+ boundary = pdict['boundary'].decode('ascii')
+ ctype = "multipart/form-data; boundary={}".format(boundary)
+ headers = Message()
+ headers.set_type(ctype)
+ headers['Content-Length'] = pdict['CONTENT-LENGTH']
+ fs = FieldStorage(fp, headers=headers, encoding=encoding,
+ environ={'REQUEST_METHOD': 'POST'})
+ return {k: fs.getlist(k) for k in fs}
def _parseparam(s):
while s[:1] == ';':
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 6373221..903d073 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -126,8 +126,8 @@ class CgiTests(unittest.TestCase):
env = {'boundary': BOUNDARY.encode('latin1'),
'CONTENT-LENGTH': '558'}
result = cgi.parse_multipart(fp, env)
- expected = {'submit': [b' Add '], 'id': [b'1234'],
- 'file': [b'Testing 123.\n'], 'title': [b'']}
+ expected = {'submit': [' Add '], 'id': ['1234'],
+ 'file': [b'Testing 123.\n'], 'title': ['']}
self.assertEqual(result, expected)
def test_fieldstorage_properties(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index a72fcef..5e5ce59 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -317,6 +317,10 @@ Extension Modules
Library
-------
+- bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and
+ making its results consistent with those of FieldStorage for
+ multipart/form-data requests. Patch by Pierre Quentel.
+
- bpo-30243: Removed the __init__ methods of _json's scanner and encoder.
Misusing them could cause memory leaks or crashes. Now scanner and encoder
objects are completely initialized in the __new__ methods.