summaryrefslogtreecommitdiffstats
path: root/Lib/httplib.py
blob: d494e21ef04c3bc8e04ae6cad550f1ba5a66b099 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# HTTP client class
#
# See the following document for a tentative protocol description:
#     Hypertext Transfer Protocol (HTTP)        Tim Berners-Lee, CERN
#     Internet Draft                                       5 Nov 1993
#     draft-ietf-iiir-http-00.txt                  Expires 5 May 1994
#
# Example:
#
# >>> from httplib import HTTP
# >>> h = HTTP('www.cwi.nl')
# >>> h.putreqest('GET', '/index.html')
# >>> h.putheader('Accept', 'text/html')
# >>> h.putheader('Accept', 'text/plain')
# >>> errcode, errmsg, headers = h.getreply()
# >>> if errcode == 200:
# ...     f = h.getfile()
# ...     print f.read() # Print the raw HTML
# ...
# <TITLE>Home Page of CWI, Amsterdam</TITLE>
# [...many more lines...]
# >>>
#
# Note that an HTTP object is used for a single request -- to issue a
# second request to the same server, you create a new HTTP object.
# (This is in accordance with the protocol, which uses a new TCP
# connection for each request.)


import os
import socket
import string
import regex
import regsub
import rfc822

HTTP_VERSION = 'HTTP/1.0'
HTTP_PORT = 80

replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
	  '[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
replyprog = regex.compile(replypat)

class HTTP:

	def __init__(self, *args):
		self.debuglevel = 0
		if args: apply(self.connect, args)

	def set_debuglevel(self, debuglevel):
		self.debuglevel = debuglevel

	def connect(self, host, *args):
		if args:
			if args[1:]: raise TypeError, 'too many args'
			port = args[0]
		else:
			i = string.find(host, ':')
			port = None
			if i >= 0:
				host, port = host[:i], host[i+1:]
				try: port = string.atoi(port)
				except string.atoi_error: pass
		if not port: port = HTTP_PORT
		self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		if self.debuglevel > 0: print 'connect:', (host, port)
		self.sock.connect(host, port)

	def send(self, str):
		if self.debuglevel > 0: print 'send:', `str`
		self.sock.send(str)

	def putrequest(self, request, selector):
		str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
		self.send(str)

	def putheader(self, header, *args):
		str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
		self.send(str)

	def endheaders(self):
		self.send('\r\n')

	def endrequest(self):
		if self.debuglevel > 0: print 'shutdown: 1'
		self.sock.shutdown(1)

	def getreply(self):
		self.endrequest()
		self.file = self.sock.makefile('r')
		line = self.file.readline()
		if self.debuglevel > 0: print 'reply:', `line`
		if replyprog.match(line) < 0:
			self.headers = None
			return -1, line, self.headers
		errcode, errmsg = replyprog.group(1, 2)
		errcode = string.atoi(errcode)
		errmsg = string.strip(errmsg)
		self.headers = rfc822.Message(self.file)
		return errcode, errmsg, self.headers

	def getfile(self):
		return self.file


def test():
	import sys
	import getopt
	opts, args = getopt.getopt(sys.argv[1:], 'd')
	dl = 0
	for o, a in opts:
		if o == '-d': dl = dl + 1
	host = 'www.cwi.nl:80'
	selector = '/index.html'
	if args[0:]: host = args[0]
	if args[1:]: selector = args[1]
	h = HTTP()
	h.set_debuglevel(dl)
	h.connect(host)
	h.putrequest('GET', selector)
	errcode, errmsg, headers = h.getreply()
	print 'errcode =', errcode
	print 'headers =', headers
	print 'errmsg  =', errmsg
	if headers:
		for header in headers.headers: print string.strip(header)
	print h.getfile().read()

if __name__ == '__main__':
	test()