From e1b13d20199f79ffd3407bbb14cc09b1b8fd70d2 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 24 Aug 2005 22:20:32 +0000 Subject: Bug #735248: Fix urllib2.parse_http_list. --- Lib/test/test_urllib2.py | 8 ++++++ Lib/urllib2.py | 75 +++++++++++++++++++++++------------------------- 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index b07fd36..8248967 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -45,6 +45,14 @@ class TrivialTests(unittest.TestCase): # test the new-in-2.5 httpresponses dictionary self.assertEquals(urllib2.httpresponses[404], "Not Found") + def test_parse_http_list(self): + tests = [('a,b,c', ['a', 'b', 'c']), + ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), + ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), + ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] + for string, list in tests: + self.assertEquals(urllib2.parse_http_list(string), list) + class MockOpener: addheaders = [] diff --git a/Lib/urllib2.py b/Lib/urllib2.py index e72f6a6..84b4bb3 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -1069,49 +1069,46 @@ def parse_keqv_list(l): def parse_http_list(s): """Parse lists as described by RFC 2068 Section 2. - + In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could - contain a comma. + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. """ - # XXX this function could probably use more testing - - list = [] - end = len(s) - i = 0 - inquote = 0 - start = 0 - while i < end: - cur = s[i:] - c = cur.find(',') - q = cur.find('"') - if c == -1: - list.append(s[start:]) - break - if q == -1: - if inquote: - raise ValueError, "unbalanced quotes" - else: - list.append(s[start:i+c]) - i = i + c + 1 + res = [] + part = '' + + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True continue - if inquote: - if q < c: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - inquote = 0 - else: - i = i + q - else: - if c < q: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - else: - inquote = 1 - i = i + q + 1 - return map(lambda x: x.strip(), list) + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL -- cgit v0.12