diff options
author | Georg Brandl <georg@python.org> | 2005-08-24 22:20:46 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2005-08-24 22:20:46 (GMT) |
commit | 6eee2fe972301f12f8ba315240fb1b99570f9198 (patch) | |
tree | f3475eed1afb1fb2da95834cbccea3617e29ecba | |
parent | 1694a5971bce0213b4e3f104aca830d2eb7b1fc7 (diff) | |
download | cpython-6eee2fe972301f12f8ba315240fb1b99570f9198.zip cpython-6eee2fe972301f12f8ba315240fb1b99570f9198.tar.gz cpython-6eee2fe972301f12f8ba315240fb1b99570f9198.tar.bz2 |
Backport bug #735248: fix urllib2.parse_http_list().
-rw-r--r-- | Lib/test/test_urllib2.py | 8 | ||||
-rw-r--r-- | Lib/urllib2.py | 75 |
2 files changed, 44 insertions, 39 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index c68d244..0b2a650 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -41,6 +41,14 @@ class TrivialTests(unittest.TestCase): buf = f.read() f.close() + def test_parse_http_list(self): + tests = [('a,b,c', ['a', 'b', 'c']), + ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']), + ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']), + ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])] + for string, list in tests: + self.assertEquals(urllib2.parse_http_list(string), list) + class MockOpener: addheaders = [] diff --git a/Lib/urllib2.py b/Lib/urllib2.py index f4c15ba..fe9c7b2 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -1064,49 +1064,46 @@ def parse_keqv_list(l): def parse_http_list(s): """Parse lists as described by RFC 2068 Section 2. - + In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could - contain a comma. + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. """ - # XXX this function could probably use more testing - - list = [] - end = len(s) - i = 0 - inquote = 0 - start = 0 - while i < end: - cur = s[i:] - c = cur.find(',') - q = cur.find('"') - if c == -1: - list.append(s[start:]) - break - if q == -1: - if inquote: - raise ValueError, "unbalanced quotes" - else: - list.append(s[start:i+c]) - i = i + c + 1 + res = [] + part = '' + + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True continue - if inquote: - if q < c: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - inquote = 0 - else: - i = i + q - else: - if c < q: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - else: - inquote = 1 - i = i + q + 1 - return map(lambda x: x.strip(), list) + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL |