diff options
-rw-r--r-- | Lib/http/server.py | 64 | ||||
-rw-r--r-- | Lib/test/test_httpservers.py | 46 |
2 files changed, 96 insertions, 14 deletions
diff --git a/Lib/http/server.py b/Lib/http/server.py index 5b5ef0a..09fa44c 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -773,6 +773,46 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): # Utilities for CGIHTTPRequestHandler +# TODO(gregory.p.smith): Move this into an appropriate library. +def _url_collapse_path_split(path): + """ + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references. + + Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + + Returns: A tuple of (head, tail) where tail is everything after the final / + and head is everything before it. Head will always start with a '/' and, + if it contains anything else, never have a trailing '/'. + + Raises: IndexError if too many '..' occur within the path. + """ + # Similar to os.path.split(os.path.normpath(path)) but specific to URL + # path semantics rather than local operating system semantics. + path_parts = [] + for part in path.split('/'): + if part == '.': + path_parts.append('') + else: + path_parts.append(part) + # Filter out blank non trailing parts before consuming the '..'. + path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:] + if path_parts: + tail_part = path_parts.pop() + else: + tail_part = '' + head_parts = [] + for part in path_parts: + if part == '..': + head_parts.pop() + else: + head_parts.append(part) + if tail_part and tail_part == '..': + head_parts.pop() + tail_part = '' + return ('/' + '/'.join(head_parts), tail_part) + + nobody = None def nobody_uid(): @@ -839,24 +879,20 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): def is_cgi(self): """Test whether self.path corresponds to a CGI script. - Return a tuple (dir, rest) if self.path requires running a - CGI script, None if not. Note that rest begins with a - slash if it is not empty. + Returns True and updates the cgi_info attribute to the tuple + (dir, rest) if self.path requires running a CGI script. + Returns False otherwise. - The default implementation tests whether the path - begins with one of the strings in the list - self.cgi_directories (and the next character is a '/' - or the end of the string). + The default implementation tests whether the normalized url + path begins with one of the strings in self.cgi_directories + (and the next character is a '/' or the end of the string). """ - path = self.path - - for x in self.cgi_directories: - i = len(x) - if path[:i] == x and (not path[i:] or path[i] == '/'): - self.cgi_info = path[:i], path[i+1:] - return True + splitpath = _url_collapse_path_split(self.path) + if splitpath[0] in self.cgi_directories: + self.cgi_info = splitpath + return True return False cgi_directories = ['/cgi-bin', '/htbin'] diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 0305a90..837d4a6 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -6,6 +6,7 @@ Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest. from http.server import BaseHTTPRequestHandler, HTTPServer, \ SimpleHTTPRequestHandler, CGIHTTPRequestHandler +from http import server import os import sys @@ -316,6 +317,45 @@ class CGIHTTPServerTestCase(BaseTestCase): finally: BaseTestCase.tearDown(self) + def test_url_collapse_path_split(self): + test_vectors = { + '': ('/', ''), + '..': IndexError, + '/.//..': IndexError, + '/': ('/', ''), + '//': ('/', ''), + '/\\': ('/', '\\'), + '/.//': ('/', ''), + 'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'), + '/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'), + 'a': ('/', 'a'), + '/a': ('/', 'a'), + '//a': ('/', 'a'), + './a': ('/', 'a'), + './C:/': ('/C:', ''), + '/a/b': ('/a', 'b'), + '/a/b/': ('/a/b', ''), + '/a/b/c/..': ('/a/b', ''), + '/a/b/c/../d': ('/a/b', 'd'), + '/a/b/c/../d/e/../f': ('/a/b/d', 'f'), + '/a/b/c/../d/e/../../f': ('/a/b', 'f'), + '/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'), + '../a/b/c/../d/e/.././././..//f': IndexError, + '/a/b/c/../d/e/../../../f': ('/a', 'f'), + '/a/b/c/../d/e/../../../../f': ('/', 'f'), + '/a/b/c/../d/e/../../../../../f': IndexError, + '/a/b/c/../d/e/../../../../f/..': ('/', ''), + } + for path, expected in test_vectors.items(): + if isinstance(expected, type) and issubclass(expected, Exception): + self.assertRaises(expected, + server._url_collapse_path_split, path) + else: + actual = server._url_collapse_path_split(path) + self.assertEquals(expected, actual, + msg='path = %r\nGot: %r\nWanted: %r' % ( + path, actual, expected)) + def test_headers_and_content(self): res = self.request('/cgi-bin/file1.py') self.assertEquals((b'Hello World\n', 'text/html', 200), \ @@ -341,6 +381,12 @@ class CGIHTTPServerTestCase(BaseTestCase): self.assertEquals((b'Hello World\n', 'text/html', 200), \ (res.read(), res.getheader('Content-type'), res.status)) + def test_no_leading_slash(self): + # http://bugs.python.org/issue2254 + res = self.request('cgi-bin/file1.py') + self.assertEquals((b'Hello World\n', 'text/html', 200), + (res.read(), res.getheader('Content-type'), res.status)) + def test_main(verbose=None): try: |