From ad71f0f0160e82c9a8be347cc0d952a69244b434 Mon Sep 17 00:00:00 2001
From: Benjamin Peterson <benjamin@python.org>
Date: Sat, 11 Apr 2009 20:12:10 +0000
Subject: Merged revisions 71303 via svnmerge from
 svn+ssh://pythondev@svn.python.org/python/trunk

........
  r71303 | gregory.p.smith | 2009-04-06 01:33:26 -0500 (Mon, 06 Apr 2009) | 3 lines

  - Issue #2254: Fix CGIHTTPServer information disclosure.  Relative paths are
    now collapsed within the url properly before looking in cgi_directories.
........
---
 Lib/http/server.py           | 64 ++++++++++++++++++++++++++++++++++----------
 Lib/test/test_httpservers.py | 46 +++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 14 deletions(-)

diff --git a/Lib/http/server.py b/Lib/http/server.py
index 5b5ef0a..09fa44c 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -773,6 +773,46 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
 
 # Utilities for CGIHTTPRequestHandler
 
+# TODO(gregory.p.smith): Move this into an appropriate library.
+def _url_collapse_path_split(path):
+    """
+    Given a URL path, remove extra '/'s and '.' path elements and collapse
+    any '..' references.
+
+    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+
+    Returns: A tuple of (head, tail) where tail is everything after the final /
+    and head is everything before it.  Head will always start with a '/' and,
+    if it contains anything else, never have a trailing '/'.
+
+    Raises: IndexError if too many '..' occur within the path.
+    """
+    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
+    # path semantics rather than local operating system semantics.
+    path_parts = []
+    for part in path.split('/'):
+        if part == '.':
+            path_parts.append('')
+        else:
+            path_parts.append(part)
+    # Filter out blank non trailing parts before consuming the '..'.
+    path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
+    if path_parts:
+        tail_part = path_parts.pop()
+    else:
+        tail_part = ''
+    head_parts = []
+    for part in path_parts:
+        if part == '..':
+            head_parts.pop()
+        else:
+            head_parts.append(part)
+    if tail_part and tail_part == '..':
+        head_parts.pop()
+        tail_part = ''
+    return ('/' + '/'.join(head_parts), tail_part)
+
+
 nobody = None
 
 def nobody_uid():
@@ -839,24 +879,20 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
     def is_cgi(self):
         """Test whether self.path corresponds to a CGI script.
 
-        Return a tuple (dir, rest) if self.path requires running a
-        CGI script, None if not.  Note that rest begins with a
-        slash if it is not empty.
+        Returns True and updates the cgi_info attribute to the tuple
+        (dir, rest) if self.path requires running a CGI script.
+        Returns False otherwise.
 
-        The default implementation tests whether the path
-        begins with one of the strings in the list
-        self.cgi_directories (and the next character is a '/'
-        or the end of the string).
+        The default implementation tests whether the normalized url
+        path begins with one of the strings in self.cgi_directories
+        (and the next character is a '/' or the end of the string).
 
         """
 
-        path = self.path
-
-        for x in self.cgi_directories:
-            i = len(x)
-            if path[:i] == x and (not path[i:] or path[i] == '/'):
-                self.cgi_info = path[:i], path[i+1:]
-                return True
+        splitpath = _url_collapse_path_split(self.path)
+        if splitpath[0] in self.cgi_directories:
+            self.cgi_info = splitpath
+            return True
         return False
 
     cgi_directories = ['/cgi-bin', '/htbin']
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 0305a90..837d4a6 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -6,6 +6,7 @@ Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest.
 
 from http.server import BaseHTTPRequestHandler, HTTPServer, \
      SimpleHTTPRequestHandler, CGIHTTPRequestHandler
+from http import server
 
 import os
 import sys
@@ -316,6 +317,45 @@ class CGIHTTPServerTestCase(BaseTestCase):
         finally:
             BaseTestCase.tearDown(self)
 
+    def test_url_collapse_path_split(self):
+        test_vectors = {
+            '': ('/', ''),
+            '..': IndexError,
+            '/.//..': IndexError,
+            '/': ('/', ''),
+            '//': ('/', ''),
+            '/\\': ('/', '\\'),
+            '/.//': ('/', ''),
+            'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
+            '/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
+            'a': ('/', 'a'),
+            '/a': ('/', 'a'),
+            '//a': ('/', 'a'),
+            './a': ('/', 'a'),
+            './C:/': ('/C:', ''),
+            '/a/b': ('/a', 'b'),
+            '/a/b/': ('/a/b', ''),
+            '/a/b/c/..': ('/a/b', ''),
+            '/a/b/c/../d': ('/a/b', 'd'),
+            '/a/b/c/../d/e/../f': ('/a/b/d', 'f'),
+            '/a/b/c/../d/e/../../f': ('/a/b', 'f'),
+            '/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'),
+            '../a/b/c/../d/e/.././././..//f': IndexError,
+            '/a/b/c/../d/e/../../../f': ('/a', 'f'),
+            '/a/b/c/../d/e/../../../../f': ('/', 'f'),
+            '/a/b/c/../d/e/../../../../../f': IndexError,
+            '/a/b/c/../d/e/../../../../f/..': ('/', ''),
+        }
+        for path, expected in test_vectors.items():
+            if isinstance(expected, type) and issubclass(expected, Exception):
+                self.assertRaises(expected,
+                                  server._url_collapse_path_split, path)
+            else:
+                actual = server._url_collapse_path_split(path)
+                self.assertEquals(expected, actual,
+                                  msg='path = %r\nGot:    %r\nWanted: %r' % (
+                                  path, actual, expected))
+
     def test_headers_and_content(self):
         res = self.request('/cgi-bin/file1.py')
         self.assertEquals((b'Hello World\n', 'text/html', 200), \
@@ -341,6 +381,12 @@ class CGIHTTPServerTestCase(BaseTestCase):
         self.assertEquals((b'Hello World\n', 'text/html', 200), \
              (res.read(), res.getheader('Content-type'), res.status))
 
+    def test_no_leading_slash(self):
+        # http://bugs.python.org/issue2254
+        res = self.request('cgi-bin/file1.py')
+        self.assertEquals((b'Hello World\n', 'text/html', 200),
+             (res.read(), res.getheader('Content-type'), res.status))
+
 
 def test_main(verbose=None):
     try:
-- 
cgit v0.12