1 files changed, 74 insertions, 46 deletions
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 7d3b506..4688096 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -82,11 +82,12 @@ XXX To do:
 
 __version__ = "0.6"
 
-__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
+__all__ = [
+    "HTTPServer", "BaseHTTPRequestHandler",
+    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
+]
 
 import html
-import email.message
-import email.parser
 import http.client
 import io
 import mimetypes
@@ -272,7 +273,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
         """
         self.command = None  # set in case of error on the first line
         self.request_version = version = self.default_request_version
-        self.close_connection = 1
+        self.close_connection = True
         requestline = str(self.raw_requestline, 'iso-8859-1')
         requestline = requestline.rstrip('\r\n')
         self.requestline = requestline
@@ -298,14 +299,14 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
                 self.send_error(400, "Bad request version (%r)" % version)
                 return False
             if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
-                self.close_connection = 0
+                self.close_connection = False
             if version_number >= (2, 0):
                 self.send_error(505,
                           "Invalid HTTP Version (%s)" % base_version_number)
                 return False
         elif len(words) == 2:
             command, path = words
-            self.close_connection = 1
+            self.close_connection = True
             if command != 'GET':
                 self.send_error(400,
                                 "Bad HTTP/0.9 request type (%r)" % command)
@@ -327,10 +328,10 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 
         conntype = self.headers.get('Connection', "")
         if conntype.lower() == 'close':
-            self.close_connection = 1
+            self.close_connection = True
         elif (conntype.lower() == 'keep-alive' and
               self.protocol_version >= "HTTP/1.1"):
-            self.close_connection = 0
+            self.close_connection = False
         # Examine the headers and look for an Expect directive
         expect = self.headers.get('Expect', "")
         if (expect.lower() == "100-continue" and
@@ -375,7 +376,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
                 self.send_error(414)
                 return
             if not self.raw_requestline:
-                self.close_connection = 1
+                self.close_connection = True
                 return
             if not self.parse_request():
                 # An error code has been sent, just exit
@@ -390,23 +391,28 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
         except socket.timeout as e:
             #a read or a write timed out.  Discard this connection
             self.log_error("Request timed out: %r", e)
-            self.close_connection = 1
+            self.close_connection = True
             return
 
     def handle(self):
         """Handle multiple requests if necessary."""
-        self.close_connection = 1
+        self.close_connection = True
 
         self.handle_one_request()
         while not self.close_connection:
             self.handle_one_request()
 
-    def send_error(self, code, message=None):
+    def send_error(self, code, message=None, explain=None):
         """Send and log an error reply.
 
-        Arguments are the error code, and a detailed message.
-        The detailed message defaults to the short entry matching the
-        response code.
+        Arguments are
+        * code:    an HTTP error code
+                   3 digits
+        * message: a simple optional 1 line reason phrase.
+                   *( HTAB / SP / VCHAR / %x80-FF )
+                   defaults to short entry matching the response code
+        * explain: a detailed message defaults to the long entry
+                   matching the response code.
 
         This sends an error response (so it must be called before any
         output has been generated), logs the error, and finally sends
@@ -420,17 +426,20 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
             shortmsg, longmsg = '???', '???'
         if message is None:
             message = shortmsg
-        explain = longmsg
+        if explain is None:
+            explain = longmsg
         self.log_error("code %d, message %s", code, message)
         # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
         content = (self.error_message_format %
-                   {'code': code, 'message': _quote_html(message), 'explain': explain})
+                   {'code': code, 'message': _quote_html(message), 'explain': _quote_html(explain)})
+        body = content.encode('UTF-8', 'replace')
         self.send_response(code, message)
         self.send_header("Content-Type", self.error_content_type)
         self.send_header('Connection', 'close')
+        self.send_header('Content-Length', int(len(body)))
         self.end_headers()
         if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
-            self.wfile.write(content.encode('UTF-8', 'replace'))
+            self.wfile.write(body)
 
     def send_response(self, code, message=None):
         """Add the response header to the headers buffer and log the
@@ -469,9 +478,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 
         if keyword.lower() == 'connection':
             if value.lower() == 'close':
-                self.close_connection = 1
+                self.close_connection = True
             elif value.lower() == 'keep-alive':
-                self.close_connection = 0
+                self.close_connection = False
 
     def end_headers(self):
         """Send the blank line ending the MIME headers."""
@@ -695,10 +704,14 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         path = self.translate_path(self.path)
         f = None
         if os.path.isdir(path):
-            if not self.path.endswith('/'):
+            parts = urllib.parse.urlsplit(self.path)
+            if not parts.path.endswith('/'):
                 # redirect browser - doing basically what apache does
                 self.send_response(301)
-                self.send_header("Location", self.path + "/")
+                new_parts = (parts[0], parts[1], parts[2] + '/',
+                             parts[3], parts[4])
+                new_url = urllib.parse.urlunsplit(new_parts)
+                self.send_header("Location", new_url)
                 self.end_headers()
                 return None
             for index in "index.html", "index.htm":
@@ -711,7 +724,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         ctype = self.guess_type(path)
         try:
             f = open(path, 'rb')
-        except IOError:
+        except OSError:
             self.send_error(404, "File not found")
             return None
         try:
@@ -736,12 +749,17 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         """
         try:
             list = os.listdir(path)
-        except os.error:
+        except OSError:
             self.send_error(404, "No permission to list directory")
             return None
         list.sort(key=lambda a: a.lower())
         r = []
-        displaypath = html.escape(urllib.parse.unquote(self.path))
+        try:
+            displaypath = urllib.parse.unquote(self.path,
+                                               errors='surrogatepass')
+        except UnicodeDecodeError:
+            displaypath = urllib.parse.unquote(path)
+        displaypath = html.escape(displaypath)
         enc = sys.getfilesystemencoding()
         title = 'Directory listing for %s' % displaypath
         r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
@@ -763,9 +781,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
                 displayname = name + "@"
                 # Note: a link to a directory displays with @ and links with /
             r.append('<li><a href="%s">%s</a></li>'
-                    % (urllib.parse.quote(linkname), html.escape(displayname)))
+                    % (urllib.parse.quote(linkname,
+                                          errors='surrogatepass'),
+                       html.escape(displayname)))
         r.append('</ul>\n<hr>\n</body>\n</html>\n')
-        encoded = '\n'.join(r).encode(enc)
+        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
         f = io.BytesIO()
         f.write(encoded)
         f.seek(0)
@@ -788,7 +808,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         path = path.split('#',1)[0]
         # Don't forget explicit trailing slash when normalizing. Issue17324
         trailing_slash = path.rstrip().endswith('/')
-        path = posixpath.normpath(urllib.parse.unquote(path))
+        try:
+            path = urllib.parse.unquote(path, errors='surrogatepass')
+        except UnicodeDecodeError:
+            path = urllib.parse.unquote(path)
+        path = posixpath.normpath(path)
         words = path.split('/')
         words = filter(None, words)
         path = os.getcwd()
@@ -857,19 +881,21 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
 def _url_collapse_path(path):
     """
     Given a URL path, remove extra '/'s and '.' path elements and collapse
-    any '..' references and returns a colllapsed path.
+    any '..' references and returns a collapsed path.
 
     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
     The utility of this function is limited to is_cgi method and helps
     preventing some security attacks.
 
-    Returns: A tuple of (head, tail) where tail is everything after the final /
-    and head is everything before it.  Head will always start with a '/' and,
-    if it contains anything else, never have a trailing '/'.
+    Returns: The reconstituted URL, which will always start with a '/'.
 
     Raises: IndexError if too many '..' occur within the path.
 
     """
+    # Query component should not be involved.
+    path, _, query = path.partition('?')
+    path = urllib.parse.unquote(path)
+
     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
     # path semantics rather than local operating system semantics.
     path_parts = path.split('/')
@@ -890,6 +916,9 @@ def _url_collapse_path(path):
     else:
         tail_part = ''
 
+    if query:
+        tail_part = '?'.join((tail_part, query))
+
     splitpath = ('/' + '/'.join(head_parts), tail_part)
     collapsed_path = "/".join(splitpath)
 
@@ -971,7 +1000,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
         (and the next character is a '/' or the end of the string).
 
         """
-        collapsed_path = _url_collapse_path(urllib.parse.unquote(self.path))
+        collapsed_path = _url_collapse_path(self.path)
         dir_sep = collapsed_path.find('/', 1)
         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
         if head in self.cgi_directories:
@@ -1008,11 +1037,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
                 break
 
         # find an explicit query string, if present.
-        i = rest.rfind('?')
-        if i >= 0:
-            rest, query = rest[:i], rest[i+1:]
-        else:
-            query = ''
+        rest, _, query = rest.partition('?')
 
         # dissect the part after the directory name into a script name &
         # a possible additional path, to be stored in PATH_INFO.
@@ -1130,7 +1155,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
             try:
                 try:
                     os.setuid(nobody)
-                except os.error:
+                except OSError:
                     pass
                 os.dup2(self.rfile.fileno(), 0)
                 os.dup2(self.wfile.fileno(), 1)
@@ -1183,15 +1208,14 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
                 self.log_message("CGI script exited OK")
 
 
-def test(HandlerClass = BaseHTTPRequestHandler,
-         ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
+def test(HandlerClass=BaseHTTPRequestHandler,
+         ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""):
     """Test the HTTP request handler class.
 
-    This runs an HTTP server on port 8000 (or the first command line
-    argument).
+    This runs an HTTP server on port 8000 (or the port argument).
 
     """
-    server_address = ('', port)
+    server_address = (bind, port)
 
     HandlerClass.protocol_version = protocol
     httpd = ServerClass(server_address, HandlerClass)
@@ -1209,12 +1233,16 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cgi', action='store_true',
                        help='Run as CGI Server')
+    parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
+                        help='Specify alternate bind address '
+                             '[default: all interfaces]')
     parser.add_argument('port', action='store',
                         default=8000, type=int,
                         nargs='?',
                         help='Specify alternate port [default: 8000]')
     args = parser.parse_args()
     if args.cgi:
-        test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
+        handler_class = CGIHTTPRequestHandler
     else:
-        test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
+        handler_class = SimpleHTTPRequestHandler
+    test(HandlerClass=handler_class, port=args.port, bind=args.bind)