summaryrefslogtreecommitdiffstats
path: root/Lib/httplib.py
blob: 1bb6bf2100878fcf35a0936cb7179a24b0b4b694 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# HTTP client class
#
# See the following document for a tentative protocol description:
#     Hypertext Transfer Protocol (HTTP)        Tim Berners-Lee, CERN
#     Internet Draft                                       5 Nov 1993
#     draft-ietf-iiir-http-00.txt                  Expires 5 May 1994
#
# Example:
#
# >>> from httplib import HTTP
# >>> h = HTTP('www.cwi.nl')
# >>> h.putreqest('GET', '/index.html')
# >>> h.putheader('Accept', 'text/html')
# >>> h.putheader('Accept', 'text/plain')
# >>> h.endheaders()
# >>> errcode, errmsg, headers = h.getreply()
# >>> if errcode == 200:
# ...     f = h.getfile()
# ...     print f.read() # Print the raw HTML
# ...
# <TITLE>Home Page of CWI, Amsterdam</TITLE>
# [...many more lines...]
# >>>
#
# Note that an HTTP object is used for a single request -- to issue a
# second request to the same server, you create a new HTTP object.
# (This is in accordance with the protocol, which uses a new TCP
# connection for each request.)


import os
import socket
import string
import regex
import regsub
import rfc822

HTTP_VERSION = 'HTTP/1.0'
HTTP_PORT = 80

replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \
	  '[ \t]+\([0-9][0-9][0-9]\)\(.*\)'
replyprog = regex.compile(replypat)

class HTTP:

	def __init__(self, host = '', port = 0):
		self.debuglevel = 0
		if host: self.connect(host, port)

	def set_debuglevel(self, debuglevel):
		self.debuglevel = debuglevel

	def connect(self, host, port = 0):
		if not port:
			i = string.find(host, ':')
			if i >= 0:
				host, port = host[:i], host[i+1:]
				try: port = string.atoi(port)
				except string.atoi_error: pass
		if not port: port = HTTP_PORT
		self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		if self.debuglevel > 0: print 'connect:', (host, port)
		self.sock.connect(host, port)

	def send(self, str):
		if self.debuglevel > 0: print 'send:', `str`
		self.sock.send(str)

	def putrequest(self, request, selector):
		if not selector: selector = '/'
		str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION)
		self.send(str)

	def putheader(self, header, *args):
		str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t'))
		self.send(str)

	def endheaders(self):
		self.send('\r\n')

	def getreply(self):
		self.file = self.sock.makefile('r')
		self.sock = None
		line = self.file.readline()
		if self.debuglevel > 0: print 'reply:', `line`
		if replyprog.match(line) < 0:
			self.headers = None
			return -1, line, self.headers
		errcode, errmsg = replyprog.group(1, 2)
		errcode = string.atoi(errcode)
		errmsg = string.strip(errmsg)
		self.headers = rfc822.Message(self.file, 0)
		return errcode, errmsg, self.headers

	def getfile(self):
		return self.file


def test():
	import sys
	import getopt
	opts, args = getopt.getopt(sys.argv[1:], 'd')
	dl = 0
	for o, a in opts:
		if o == '-d': dl = dl + 1
	host = 'www.cwi.nl:80'
	selector = '/index.html'
	if args[0:]: host = args[0]
	if args[1:]: selector = args[1]
	h = HTTP()
	h.set_debuglevel(dl)
	h.connect(host)
	h.putrequest('GET', selector)
	errcode, errmsg, headers = h.getreply()
	print 'errcode =', errcode
	print 'headers =', headers
	print 'errmsg  =', errmsg
	if headers:
		for header in headers.headers: print string.strip(header)
	print h.getfile().read()

if __name__ == '__main__':
	test()