summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2005-08-24 22:20:32 (GMT)
committerGeorg Brandl <georg@python.org>2005-08-24 22:20:32 (GMT)
commite1b13d20199f79ffd3407bbb14cc09b1b8fd70d2 (patch)
tree8df7a1a1a316d551d0cb7f5cc36312f957ce9c94
parent256372c88cbd203e92950129c228c1df82d65f3e (diff)
downloadcpython-e1b13d20199f79ffd3407bbb14cc09b1b8fd70d2.zip
cpython-e1b13d20199f79ffd3407bbb14cc09b1b8fd70d2.tar.gz
cpython-e1b13d20199f79ffd3407bbb14cc09b1b8fd70d2.tar.bz2
Bug #735248: Fix urllib2.parse_http_list.
-rw-r--r--Lib/test/test_urllib2.py8
-rw-r--r--Lib/urllib2.py75
2 files changed, 44 insertions, 39 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index b07fd36..8248967 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -45,6 +45,14 @@ class TrivialTests(unittest.TestCase):
# test the new-in-2.5 httpresponses dictionary
self.assertEquals(urllib2.httpresponses[404], "Not Found")
+ def test_parse_http_list(self):
+ tests = [('a,b,c', ['a', 'b', 'c']),
+ ('path"o,l"og"i"cal, example', ['path"o,l"og"i"cal', 'example']),
+ ('a, b, "c", "d", "e,f", g, h', ['a', 'b', '"c"', '"d"', '"e,f"', 'g', 'h']),
+ ('a="b\\"c", d="e\\,f", g="h\\\\i"', ['a="b"c"', 'd="e,f"', 'g="h\\i"'])]
+ for string, list in tests:
+ self.assertEquals(urllib2.parse_http_list(string), list)
+
class MockOpener:
addheaders = []
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index e72f6a6..84b4bb3 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -1069,49 +1069,46 @@ def parse_keqv_list(l):
def parse_http_list(s):
"""Parse lists as described by RFC 2068 Section 2.
-
+
In particular, parse comma-separated lists where the elements of
the list may include quoted-strings. A quoted-string could
- contain a comma.
+ contain a comma. A non-quoted string could have quotes in the
+ middle. Neither commas nor quotes count if they are escaped.
+ Only double-quotes count, not single-quotes.
"""
- # XXX this function could probably use more testing
-
- list = []
- end = len(s)
- i = 0
- inquote = 0
- start = 0
- while i < end:
- cur = s[i:]
- c = cur.find(',')
- q = cur.find('"')
- if c == -1:
- list.append(s[start:])
- break
- if q == -1:
- if inquote:
- raise ValueError, "unbalanced quotes"
- else:
- list.append(s[start:i+c])
- i = i + c + 1
+ res = []
+ part = ''
+
+ escape = quote = False
+ for cur in s:
+ if escape:
+ part += cur
+ escape = False
+ continue
+ if quote:
+ if cur == '\\':
+ escape = True
continue
- if inquote:
- if q < c:
- list.append(s[start:i+c])
- i = i + c + 1
- start = i
- inquote = 0
- else:
- i = i + q
- else:
- if c < q:
- list.append(s[start:i+c])
- i = i + c + 1
- start = i
- else:
- inquote = 1
- i = i + q + 1
- return map(lambda x: x.strip(), list)
+ elif cur == '"':
+ quote = False
+ part += cur
+ continue
+
+ if cur == ',':
+ res.append(part)
+ part = ''
+ continue
+
+ if cur == '"':
+ quote = True
+
+ part += cur
+
+ # append last part
+ if part:
+ res.append(part)
+
+ return [part.strip() for part in res]
class FileHandler(BaseHandler):
# Use local file or FTP depending on form of URL