1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
# HTTP client class
#
# See the following document for a tentative protocol description:
# Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN
# Internet Draft 5 Nov 1993
# draft-ietf-iiir-http-00.txt Expires 5 May 1994
#
# Example:
#
# >>> from httplib import HTTP
# >>> h = HTTP('www.cwi.nl')
# >>> h.putreqest('GET', '/index.html')
# >>> h.putheader('Accept', 'text/html')
# >>> h.putheader('Accept', 'text/plain')
# >>> errcode, errmsg, headers = h.getreply()
# >>> if errcode == 200:
# ... f = h.getfile()
# ... print f.read() # Print the raw HTML
# ...
# <TITLE>Home Page of CWI, Amsterdam</TITLE>
# [...many more lines...]
# >>>
#
# Note that an HTTP object is used for a single request -- to issue a
# second request to the same server, you create a new HTTP object.
# (This is in accordance with the protocol, which uses a new TCP
# connection for each request.)
import os
import socket
import string
import regex
import regsub
import rfc822
HTTP_VERSION = 'HTTP/1.0'
HTTP_PORT = 80
replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
'[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
replyprog = regex.compile(replypat)
class HTTP:
def __init__(self, *args):
self.debuglevel = 0
if args: apply(self.connect, args)
def set_debuglevel(self, debuglevel):
self.debuglevel = debuglevel
def connect(self, host, *args):
if args:
if args[1:]: raise TypeError, 'too many args'
port = args[0]
else:
i = string.find(host, ':')
port = None
if i >= 0:
host, port = host[:i], host[i+1:]
try: port = string.atoi(port)
except string.atoi_error: pass
if not port: port = HTTP_PORT
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if self.debuglevel > 0: print 'connect:', (host, port)
self.sock.connect(host, port)
def send(self, str):
if self.debuglevel > 0: print 'send:', `str`
self.sock.send(str)
def putrequest(self, request, selector):
str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
self.send(str)
def putheader(self, header, *args):
str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
self.send(str)
def endheaders(self):
self.send('\r\n')
def endrequest(self):
if self.debuglevel > 0: print 'shutdown: 1'
self.sock.shutdown(1)
def getreply(self):
self.endrequest()
self.file = self.sock.makefile('r')
line = self.file.readline()
if self.debuglevel > 0: print 'reply:', `line`
if replyprog.match(line) < 0:
self.headers = None
return -1, line, self.headers
errcode, errmsg = replyprog.group(1, 2)
errcode = string.atoi(errcode)
errmsg = string.strip(errmsg)
self.headers = rfc822.Message(self.file)
return errcode, errmsg, self.headers
def getfile(self):
return self.file
def test():
import sys
import getopt
opts, args = getopt.getopt(sys.argv[1:], 'd')
dl = 0
for o, a in opts:
if o == '-d': dl = dl + 1
host = 'www.cwi.nl:80'
selector = '/index.html'
if args[0:]: host = args[0]
if args[1:]: selector = args[1]
h = HTTP()
h.set_debuglevel(dl)
h.connect(host)
h.putrequest('GET', selector)
errcode, errmsg, headers = h.getreply()
print 'errcode =', errcode
print 'headers =', headers
print 'errmsg =', errmsg
if headers:
for header in headers.headers: print string.strip(header)
print h.getfile().read()
if __name__ == '__main__':
test()
|