Patch for issue 2848, mostly by Humberto Diogenes, with a couple of

small fixes by Barry. This removes mimetools from the stdlib.
author: Barry Warsaw <barry@python.org> 2008-06-12 04:06:45 (GMT)
committer: Barry Warsaw <barry@python.org> 2008-06-12 04:06:45 (GMT)
commit: 820c1200597606f95bb996586be88a3283c6448c (patch)
tree: 1b914ab96ccc9cd81465a6c3e765c97f128fd464 /Lib/http
parent: 75f25f2c9a4646746efbc056b4d2a07b40f93964 (diff)
download: cpython-820c1200597606f95bb996586be88a3283c6448c.zip
cpython-820c1200597606f95bb996586be88a3283c6448c.tar.gz
cpython-820c1200597606f95bb996586be88a3283c6448c.tar.bz2
3 files changed, 77 insertions, 131 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index c6e40e1..04e75f6 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -67,8 +67,9 @@ Req-sent-unread-response       _CS_REQ_SENT       <response_class>
 """
 
 import io
-import mimetools
 import socket
+import email.parser
+import email.message
 from urlparse import urlsplit
 import warnings
 
@@ -201,110 +202,52 @@ responses = {
 # maximal amount of data to read at one time in _safe_read
 MAXAMOUNT = 1048576
 
-class HTTPMessage(mimetools.Message):
+class HTTPMessage(email.message.Message):
+    def getallmatchingheaders(self, name):
+        """Find all header lines matching a given header name.
+
+        Look through the list of headers and find all lines matching a given
+        header name (and their continuation lines).  A list of the lines is
+        returned, without interpretation.  If the header does not occur, an
+        empty list is returned.  If the header occurs multiple times, all
+        occurrences are returned.  Case is not important in the header name.
 
-    def addheader(self, key, value):
-        """Add header for field key handling repeats."""
-        prev = self.dict.get(key)
-        if prev is None:
-            self.dict[key] = value
-        else:
-            combined = ", ".join((prev, value))
-            self.dict[key] = combined
-
-    def addcontinue(self, key, more):
-        """Add more field data from a continuation line."""
-        prev = self.dict[key]
-        self.dict[key] = prev + "\n " + more
-
-    def readheaders(self):
-        """Read header lines.
-
-        Read header lines up to the entirely blank line that terminates them.
-        The (normally blank) line that ends the headers is skipped, but not
-        included in the returned list.  If a non-header line ends the headers,
-        (which is an error), an attempt is made to backspace over it; it is
-        never included in the returned list.
-
-        The variable self.status is set to the empty string if all went well,
-        otherwise it is an error message.  The variable self.headers is a
-        completely uninterpreted list of lines contained in the header (so
-        printing them will reproduce the header exactly as it appears in the
-        file).
-
-        If multiple header fields with the same name occur, they are combined
-        according to the rules in RFC 2616 sec 4.2:
-
-        Appending each subsequent field-value to the first, each separated
-        by a comma. The order in which header fields with the same field-name
-        are received is significant to the interpretation of the combined
-        field value.
         """
-        # XXX The implementation overrides the readheaders() method of
-        # rfc822.Message.  The base class design isn't amenable to
-        # customized behavior here so the method here is a copy of the
-        # base class code with a few small changes.
-
-        self.dict = {}
-        self.unixfrom = ''
-        self.headers = hlist = []
-        self.status = ''
-        headerseen = ""
-        firstline = 1
-        startofline = unread = tell = None
-        if hasattr(self.fp, 'unread'):
-            unread = self.fp.unread
-        elif self.seekable:
-            tell = self.fp.tell
-        while True:
-            if tell:
-                try:
-                    startofline = tell()
-                except IOError:
-                    startofline = tell = None
-                    self.seekable = 0
-            line = str(self.fp.readline(), "iso-8859-1")
-            if not line:
-                self.status = 'EOF in headers'
-                break
-            # Skip unix From name time lines
-            if firstline and line.startswith('From '):
-                self.unixfrom = self.unixfrom + line
-                continue
-            firstline = 0
-            if headerseen and line[0] in ' \t':
-                # XXX Not sure if continuation lines are handled properly
-                # for http and/or for repeating headers
-                # It's a continuation line.
-                hlist.append(line)
-                self.addcontinue(headerseen, line.strip())
-                continue
-            elif self.iscomment(line):
-                # It's a comment.  Ignore it.
-                continue
-            elif self.islast(line):
-                # Note! No pushback here!  The delimiter line gets eaten.
-                break
-            headerseen = self.isheader(line)
-            if headerseen:
-                # It's a legal header line, save it.
-                hlist.append(line)
-                self.addheader(headerseen, line[len(headerseen)+1:].strip())
-                continue
-            else:
-                # It's not a header line; throw it back and stop here.
-                if not self.dict:
-                    self.status = 'No headers'
-                else:
-                    self.status = 'Non-header line where header expected'
-                # Try to undo the read.
-                if unread:
-                    unread(line)
-                elif tell:
-                    self.fp.seek(startofline)
-                else:
-                    self.status = self.status + '; bad seek'
-                break
+        # XXX: copied from rfc822.Message for compatibility
+        name = name.lower() + ':'
+        n = len(name)
+        lst = []
+        hit = 0
+        for line in self.keys():
+            if line[:n].lower() == name:
+                hit = 1
+            elif not line[:1].isspace():
+                hit = 0
+            if hit:
+                lst.append(line)
+        return lst
+
+def parse_headers(fp):
+    """Parses only RFC2822 headers from a file pointer.
+
+    email Parser wants to see strings rather than bytes.
+    But a TextIOWrapper around self.rfile would buffer too many bytes
+    from the stream, bytes which we later need to read as bytes.
+    So we read the correct bytes here, as bytes, for email Parser
+    to parse.
+
+    """
+    # XXX: Copied from http.server.BaseHTTPRequestHandler.parse_request,
+    # maybe we can just call this function from there.
+    headers = []
+    while True:
+        line = fp.readline()
+        headers.append(line)
+        if line in (b'\r\n', b'\n', b''):
+            break
+    hstring = b''.join(headers).decode('iso-8859-1')
+
+    return email.parser.Parser(_class=HTTPMessage).parsestr(hstring)
 
 class HTTPResponse:
 
@@ -418,19 +361,17 @@ class HTTPResponse:
             self.length = None
             self.chunked = 0
             self.will_close = 1
-            self.msg = HTTPMessage(io.BytesIO())
+            self.msg = email.message_from_string('')
             return
 
-        self.msg = HTTPMessage(self.fp, 0)
+        self.msg = parse_headers(self.fp)
+
         if self.debuglevel > 0:
-            for hdr in self.msg.headers:
+            for hdr in self.msg:
                 print("header:", hdr, end=" ")
 
-        # don't let the msg keep an fp
-        self.msg.fp = None
-
         # are we using the chunked-style of transfer encoding?
-        tr_enc = self.msg.getheader("transfer-encoding")
+        tr_enc = self.msg.get("transfer-encoding")
         if tr_enc and tr_enc.lower() == "chunked":
             self.chunked = 1
             self.chunk_left = None
@@ -443,7 +384,10 @@ class HTTPResponse:
         # do we have a Content-Length?
         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
         self.length = None
-        length = self.msg.getheader("content-length")
+        length = self.msg.get("content-length")
+
+         # are we using the chunked-style of transfer encoding?
+        tr_enc = self.msg.get("transfer-encoding")
         if length and not self.chunked:
             try:
                 self.length = int(length)
@@ -470,11 +414,11 @@ class HTTPResponse:
             self.will_close = 1
 
     def _check_close(self):
-        conn = self.msg.getheader("connection")
+        conn = self.msg.get("connection")
         if self.version == 11:
             # An HTTP/1.1 proxy is assumed to stay open unless
             # explicitly closed.
-            conn = self.msg.getheader("connection")
+            conn = self.msg.get("connection")
             if conn and "close" in conn.lower():
                 return True
             return False
@@ -483,7 +427,7 @@ class HTTPResponse:
         # connections, using rules different than HTTP/1.1.
 
         # For older HTTP, Keep-Alive indicates persistent connection.
-        if self.msg.getheader("keep-alive"):
+        if self.msg.get("keep-alive"):
             return False
 
         # At least Akamai returns a "Connection: Keep-Alive" header,
@@ -492,7 +436,7 @@ class HTTPResponse:
             return False
 
         # Proxy-Connection is a netscape hack.
-        pconn = self.msg.getheader("proxy-connection")
+        pconn = self.msg.get("proxy-connection")
         if pconn and "keep-alive" in pconn.lower():
             return False
 
@@ -644,7 +588,7 @@ class HTTPResponse:
     def getheader(self, name, default=None):
         if self.msg is None:
             raise ResponseNotReady()
-        return self.msg.getheader(name, default)
+        return ', '.join(self.msg.get_all(name, default))
 
     def getheaders(self):
         """Return list of (header, value) tuples."""
diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py
index 4d4a105..99be888 100644
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@@ -1547,8 +1547,8 @@ class CookieJar:
         """Return sequence of Cookie objects extracted from response object."""
         # get cookie-attributes for RFC 2965 and Netscape protocols
         headers = response.info()
-        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
-        ns_hdrs = headers.getheaders("Set-Cookie")
+        rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
+        ns_hdrs = headers.get_all("Set-Cookie", [])
 
         rfc2965 = self._policy.rfc2965
         netscape = self._policy.netscape
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 4f41a19..2b6f135 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -95,10 +95,11 @@ import socket # For gethostbyaddr()
 import shutil
 import urllib
 import select
-import mimetools
 import mimetypes
 import posixpath
 import socketserver
+import email.message
+import email.parser
 
 # Default error message template
 DEFAULT_ERROR_MESSAGE = """\
@@ -211,7 +212,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 
     - command, path and version are the broken-down request line;
 
-    - headers is an instance of mimetools.Message (or a derived
+    - headers is an instance of email.message.Message (or a derived
     class) containing the header information;
 
     - rfile is a file object open for reading positioned at the
@@ -326,7 +327,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
             if line in (b'\r\n', b'\n', b''):
                 break
         hfile = io.StringIO(b''.join(headers).decode('iso-8859-1'))
-        self.headers = self.MessageClass(hfile)
+        self.headers = email.parser.Parser(_class=self.MessageClass).parse(hfile)
 
         conntype = self.headers.get('Connection', "")
         if conntype.lower() == 'close':
@@ -524,8 +525,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
     # Set this to HTTP/1.1 to enable automatic keepalive
     protocol_version = "HTTP/1.0"
 
-    # The Message-like class used to parse headers
-    MessageClass = mimetools.Message
+    # MessageClass used to parse headers
+    import http.client
+    MessageClass = http.client.HTTPMessage
 
     # Table mapping response codes to messages; entries have the
     # form {code: (shortmessage, longmessage)}.
@@ -955,7 +957,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
         if host != self.client_address[0]:
             env['REMOTE_HOST'] = host
         env['REMOTE_ADDR'] = self.client_address[0]
-        authorization = self.headers.getheader("authorization")
+        authorization = self.headers.get("authorization")
         if authorization:
             authorization = authorization.split()
             if len(authorization) == 2:
@@ -973,14 +975,14 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
                         if len(authorization) == 2:
                             env['REMOTE_USER'] = authorization[0]
         # XXX REMOTE_IDENT
-        if self.headers.typeheader is None:
-            env['CONTENT_TYPE'] = self.headers.type
+        if self.headers.get('content-type') is None:
+            env['CONTENT_TYPE'] = self.headers.get_content_type()
         else:
-            env['CONTENT_TYPE'] = self.headers.typeheader
-        length = self.headers.getheader('content-length')
+            env['CONTENT_TYPE'] = self.headers['content-type']
+        length = self.headers.get('content-length')
         if length:
             env['CONTENT_LENGTH'] = length
-        referer = self.headers.getheader('referer')
+        referer = self.headers.get('referer')
         if referer:
             env['HTTP_REFERER'] = referer
         accept = []
@@ -990,10 +992,10 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
             else:
                 accept = accept + line[7:].split(',')
         env['HTTP_ACCEPT'] = ','.join(accept)
-        ua = self.headers.getheader('user-agent')
+        ua = self.headers.get('user-agent')
         if ua:
             env['HTTP_USER_AGENT'] = ua
-        co = filter(None, self.headers.getheaders('cookie'))
+        co = filter(None, self.headers.get_all('cookie', []))
         if co:
             env['HTTP_COOKIE'] = ', '.join(co)
         # XXX Other HTTP_* headers
author	Barry Warsaw <barry@python.org>	2008-06-12 04:06:45 (GMT)
committer	Barry Warsaw <barry@python.org>	2008-06-12 04:06:45 (GMT)
commit	820c1200597606f95bb996586be88a3283c6448c (patch)
tree	1b914ab96ccc9cd81465a6c3e765c97f128fd464 /Lib/http
parent	75f25f2c9a4646746efbc056b4d2a07b40f93964 (diff)
download	cpython-820c1200597606f95bb996586be88a3283c6448c.zip cpython-820c1200597606f95bb996586be88a3283c6448c.tar.gz cpython-820c1200597606f95bb996586be88a3283c6448c.tar.bz2