From c585df94766702ac6d6f3f0d27ad5d07e5546164 Mon Sep 17 00:00:00 2001 From: Facundo Batista Date: Wed, 3 Sep 2008 22:35:50 +0000 Subject: Issue 600362: Relocated parse_qs() and parse_qsl(), from the cgi module to the urlparse one. Added a PendingDeprecationWarning in the old module, it will be deprecated in the future. Docs and tests updated. --- Doc/library/cgi.rst | 43 ++++------------------- Doc/library/urllib.rst | 2 +- Doc/library/urlparse.rst | 39 +++++++++++++++++++++ Lib/cgi.py | 85 ++++++++++----------------------------------- Lib/test/test_cgi.py | 22 ------------ Lib/test/test_urlparse.py | 22 ++++++++++++ Lib/urlparse.py | 88 ++++++++++++++++++++++++++++++++++++++++++++++- Misc/NEWS | 4 +++ 8 files changed, 177 insertions(+), 128 deletions(-) diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst index 17cd750..0248284 100644 --- a/Doc/library/cgi.rst +++ b/Doc/library/cgi.rst @@ -282,49 +282,18 @@ algorithms implemented in this module in other circumstances. Parse a query in the environment or from a file (the file defaults to ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are - passed to :func:`parse_qs` unchanged. + passed to :func:`urlparse.parse_qs` unchanged. .. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]]) - Parse a query string given as a string argument (data of type - :mimetype:`application/x-www-form-urlencoded`). Data are returned as a - dictionary. The dictionary keys are the unique query variable names and the - values are lists of values for each name. - - The optional argument *keep_blank_values* is a flag indicating whether blank - values in URL encoded queries should be treated as blank strings. A true value - indicates that blanks should be retained as blank strings. The default false - value indicates that blank values are to be ignored and treated as if they were - not included. - - The optional argument *strict_parsing* is a flag indicating what to do with - parsing errors. If false (the default), errors are silently ignored. If true, - errors raise a :exc:`ValueError` exception. - - Use the :func:`urllib.urlencode` function to convert such dictionaries into - query strings. - + This function is deprecated in this module. Use :func:`urlparse.parse_qs` + instead. It is maintained here only for backward compatiblity. .. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]]) - Parse a query string given as a string argument (data of type - :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of - name, value pairs. - - The optional argument *keep_blank_values* is a flag indicating whether blank - values in URL encoded queries should be treated as blank strings. A true value - indicates that blanks should be retained as blank strings. The default false - value indicates that blank values are to be ignored and treated as if they were - not included. - - The optional argument *strict_parsing* is a flag indicating what to do with - parsing errors. If false (the default), errors are silently ignored. If true, - errors raise a :exc:`ValueError` exception. - - Use the :func:`urllib.urlencode` function to convert such lists of pairs into - query strings. - + This function is deprecated in this module. Use :func:`urlparse.parse_qsl` + instead. It is maintained here only for backward compatiblity. .. function:: parse_multipart(fp, pdict) @@ -332,7 +301,7 @@ algorithms implemented in this module in other circumstances. Arguments are *fp* for the input file and *pdict* for a dictionary containing other parameters in the :mailheader:`Content-Type` header. - Returns a dictionary just like :func:`parse_qs` keys are the field names, each + Returns a dictionary just like :func:`urlparse.parse_qs` keys are the field names, each value is a list of values for that field. This is easy to use but not much good if you are expecting megabytes to be uploaded --- in that case, use the :class:`FieldStorage` class instead which is much more flexible. diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst index 15e0c7a..b2f96a1 100644 --- a/Doc/library/urllib.rst +++ b/Doc/library/urllib.rst @@ -242,7 +242,7 @@ Utility functions of the sequence. When a sequence of two-element tuples is used as the *query* argument, the first element of each tuple is a key and the second is a value. The order of parameters in the encoded string will match the order of parameter - tuples in the sequence. The :mod:`cgi` module provides the functions + tuples in the sequence. The :mod:`urlparse` module provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used to parse query strings into Python data structures. diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst index 02984f6..e7ed0f1 100644 --- a/Doc/library/urlparse.rst +++ b/Doc/library/urlparse.rst @@ -101,6 +101,45 @@ The :mod:`urlparse` module defines the following functions: .. versionchanged:: 2.5 Added attributes to return value. +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]]) + + Parse a query string given as a string argument (data of type + :mimetype:`application/x-www-form-urlencoded`). Data are returned as a + dictionary. The dictionary keys are the unique query variable names and the + values are lists of values for each name. + + The optional argument *keep_blank_values* is a flag indicating whether blank + values in URL encoded queries should be treated as blank strings. A true value + indicates that blanks should be retained as blank strings. The default false + value indicates that blank values are to be ignored and treated as if they were + not included. + + The optional argument *strict_parsing* is a flag indicating what to do with + parsing errors. If false (the default), errors are silently ignored. If true, + errors raise a :exc:`ValueError` exception. + + Use the :func:`urllib.urlencode` function to convert such dictionaries into + query strings. + + +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]]) + + Parse a query string given as a string argument (data of type + :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of + name, value pairs. + + The optional argument *keep_blank_values* is a flag indicating whether blank + values in URL encoded queries should be treated as blank strings. A true value + indicates that blanks should be retained as blank strings. The default false + value indicates that blank values are to be ignored and treated as if they were + not included. + + The optional argument *strict_parsing* is a flag indicating what to do with + parsing errors. If false (the default), errors are silently ignored. If true, + errors raise a :exc:`ValueError` exception. + + Use the :func:`urllib.urlencode` function to convert such lists of pairs into + query strings. .. function:: urlunparse(parts) diff --git a/Lib/cgi.py b/Lib/cgi.py index dd11389..373ba51 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -39,7 +39,9 @@ import sys import os import urllib import UserDict -from warnings import filterwarnings, catch_warnings +import urlparse + +from warnings import filterwarnings, catch_warnings, warn with catch_warnings(): if sys.py3kwarning: filterwarnings("ignore", ".*mimetools has been removed", @@ -173,72 +175,21 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): return parse_qs(qs, keep_blank_values, strict_parsing) -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed +# parse query string function called from urlparse, +# this is done in order to maintain backward compatiblity. - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument.""" + warn("cgi.parse_qs is deprecated, use urlparse.parse_qs \ + instead",PendingDeprecationWarning) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - """ - dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): - if name in dict: - dict[name].append(value) - else: - dict[name] = [value] - return dict def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - Returns a list, as G-d intended. - """ - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError, "bad query field: %r" % (name_value,) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = urllib.unquote(nv[0].replace('+', ' ')) - value = urllib.unquote(nv[1].replace('+', ' ')) - r.append((name, value)) - - return r - + """Parse a query given as a string argument.""" + warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead", + PendingDeprecationWarning) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) def parse_multipart(fp, pdict): """Parse multipart input. @@ -645,8 +596,8 @@ class FieldStorage: if self.qs_on_post: qs += '&' + self.qs_on_post self.list = list = [] - for key, value in parse_qsl(qs, self.keep_blank_values, - self.strict_parsing): + for key, value in urlparse.parse_qsl(qs, self.keep_blank_values, + self.strict_parsing): list.append(MiniFieldStorage(key, value)) self.skip_lines() @@ -659,8 +610,8 @@ class FieldStorage: raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,) self.list = [] if self.qs_on_post: - for key, value in parse_qsl(self.qs_on_post, self.keep_blank_values, - self.strict_parsing): + for key, value in urlparse.parse_qsl(self.qs_on_post, + self.keep_blank_values, self.strict_parsing): self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index 042e507..79bca4e 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -55,23 +55,6 @@ def do_test(buf, method): except StandardError, err: return ComparableException(err) -# A list of test cases. Each test case is a a two-tuple that contains -# a string with the query and a dictionary with the expected result. - -parse_qsl_test_cases = [ - ("", []), - ("&", []), - ("&&", []), - ("=", [('', '')]), - ("=a", [('', 'a')]), - ("a", [('a', '')]), - ("a=", [('a', '')]), - ("a=", [('a', '')]), - ("&a=b", [('a', 'b')]), - ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), - ("a=1&a=2", [('a', '1'), ('a', '2')]), -] - parse_strict_test_cases = [ ("", ValueError("bad query field: ''")), ("&", ValueError("bad query field: ''")), @@ -143,11 +126,6 @@ def gen_result(data, environ): class CgiTests(unittest.TestCase): - def test_qsl(self): - for orig, expect in parse_qsl_test_cases: - result = cgi.parse_qsl(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %s" % repr(orig)) - def test_strict(self): for orig, expect in parse_strict_test_cases: # Test basic parsing diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index e7d9e5a..fcd1989 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -8,6 +8,23 @@ RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" RFC3986_BASE = "http://a/b/c/d;p?q" +# A list of test cases. Each test case is a a two-tuple that contains +# a string with the query and a dictionary with the expected result. + +parse_qsl_test_cases = [ + ("", []), + ("&", []), + ("&&", []), + ("=", [('', '')]), + ("=a", [('', 'a')]), + ("a", [('a', '')]), + ("a=", [('a', '')]), + ("a=", [('a', '')]), + ("&a=b", [('a', 'b')]), + ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), + ("a=1&a=2", [('a', '1'), ('a', '2')]), +] + class UrlParseTestCase(unittest.TestCase): def checkRoundtrips(self, url, parsed, split): @@ -61,6 +78,11 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) + def test_qsl(self): + for orig, expect in parse_qsl_test_cases: + result = urlparse.parse_qsl(orig, keep_blank_values=True) + self.assertEqual(result, expect, "Error parsing %s" % repr(orig)) + def test_roundtrips(self): testcases = [ ('file:///tmp/junk.txt', diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 1914304..c56d883 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -5,7 +5,7 @@ UC Irvine, June 1995. """ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit"] + "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', @@ -267,6 +267,92 @@ def urldefrag(url): else: return url, '' +# unquote method for parse_qs and parse_qsl +# Cannot use directly from urllib as it would create circular reference. +# urllib uses urlparse methods ( urljoin) + +_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) +_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) + +def unquote(s): + """unquote('abc%20def') -> 'abc def'.""" + res = s.split('%') + for i in xrange(1, len(res)): + item = res[i] + try: + res[i] = _hextochr[item[:2]] + item[2:] + except KeyError: + res[i] = '%' + item + except UnicodeDecodeError: + res[i] = unichr(int(item[:2], 16)) + item[2:] + return "".join(res) + +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + """ + dict = {} + for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): + if name in dict: + dict[name].append(value) + else: + dict[name] = [value] + return dict + +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a list, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError, "bad query field: %r" % (name_value,) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote(nv[0].replace('+', ' ')) + value = unquote(nv[1].replace('+', ' ')) + r.append((name, value)) + + return r + test_input = """ http://a/b/c/d diff --git a/Misc/NEWS b/Misc/NEWS index b4f02b2..f0da457 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -56,6 +56,10 @@ C-API Library ------- +- Issue 600362: Relocated parse_qs() and parse_qsl(), from the cgi module + to the urlparse one. Added a PendingDeprecationWarning in the old + module, it will be deprecated in the future. + - Issue #2562: Fix distutils PKG-INFO writing logic to allow having non-ascii characters and Unicode in setup.py meta-data. -- cgit v0.12