1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
"""Abstract base classes related to import."""
from . import _bootstrap
from . import machinery
from . import util
import abc
import imp
import io
import marshal
import os.path
import sys
import tokenize
import types
import warnings
class Loader(metaclass=abc.ABCMeta):
"""Abstract base class for import loaders."""
@abc.abstractmethod
def load_module(self, fullname):
"""Abstract method which when implemented should load a module.
The fullname is a str."""
raise NotImplementedError
class Finder(metaclass=abc.ABCMeta):
"""Abstract base class for import finders."""
@abc.abstractmethod
def find_module(self, fullname, path=None):
"""Abstract method which when implemented should find a module.
The fullname is a str and the optional path is a str or None.
Returns a Loader object.
"""
raise NotImplementedError
Finder.register(machinery.BuiltinImporter)
Finder.register(machinery.FrozenImporter)
Finder.register(machinery.PathFinder)
class ResourceLoader(Loader):
"""Abstract base class for loaders which can return data from their
back-end storage.
This ABC represents one of the optional protocols specified by PEP 302.
"""
@abc.abstractmethod
def get_data(self, path):
"""Abstract method which when implemented should return the bytes for
the specified path. The path must be a str."""
raise NotImplementedError
class InspectLoader(Loader):
"""Abstract base class for loaders which support inspection about the
modules they can load.
This ABC represents one of the optional protocols specified by PEP 302.
"""
@abc.abstractmethod
def is_package(self, fullname):
"""Abstract method which when implemented should return whether the
module is a package. The fullname is a str. Returns a bool."""
raise NotImplementedError
@abc.abstractmethod
def get_code(self, fullname):
"""Abstract method which when implemented should return the code object
for the module. The fullname is a str. Returns a types.CodeType."""
raise NotImplementedError
@abc.abstractmethod
def get_source(self, fullname):
"""Abstract method which should return the source code for the
module. The fullname is a str. Returns a str."""
raise NotImplementedError
InspectLoader.register(machinery.BuiltinImporter)
InspectLoader.register(machinery.FrozenImporter)
class ExecutionLoader(InspectLoader):
"""Abstract base class for loaders that wish to support the execution of
modules as scripts.
This ABC represents one of the optional protocols specified in PEP 302.
"""
@abc.abstractmethod
def get_filename(self, fullname):
"""Abstract method which should return the value that __file__ is to be
set to."""
raise NotImplementedError
class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
"""Abstract base class for loading source code (and optionally any
corresponding bytecode).
To support loading from source code, the abstractmethods inherited from
ResourceLoader and ExecutionLoader need to be implemented. To also support
loading from bytecode, the optional methods specified directly by this ABC
is required.
Inherited abstractmethods not implemented in this ABC:
* ResourceLoader.get_data
* ExecutionLoader.get_filename
"""
def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
raise NotImplementedError
def set_data(self, path, data):
"""Write the bytes to the path (if possible).
Accepts a str path and data as bytes.
Any needed intermediary directories are to be created. If for some
reason the file cannot be written because of permissions, fail
silently.
"""
raise NotImplementedError
class PyLoader(SourceLoader):
"""Implement the deprecated PyLoader ABC in terms of SourceLoader.
This class has been deprecated! It is slated for removal in Python 3.4.
If compatibility with Python 3.1 is not needed then implement the
SourceLoader ABC instead of this class. If Python 3.1 compatibility is
needed, then use the following idiom to have a single class that is
compatible with Python 3.1 onwards::
try:
from importlib.abc import SourceLoader
except ImportError:
from importlib.abc import PyLoader as SourceLoader
class CustomLoader(SourceLoader):
def get_filename(self, fullname):
# Implement ...
def source_path(self, fullname):
'''Implement source_path in terms of get_filename.'''
try:
return self.get_filename(fullname)
except ImportError:
return None
def is_package(self, fullname):
filename = os.path.basename(self.get_filename(fullname))
return os.path.splitext(filename)[0] == '__init__'
"""
@abc.abstractmethod
def is_package(self, fullname):
raise NotImplementedError
@abc.abstractmethod
def source_path(self, fullname):
"""Abstract method. Accepts a str module name and returns the path to
the source code for the module."""
raise NotImplementedError
def get_filename(self, fullname):
"""Implement get_filename in terms of source_path.
As get_filename should only return a source file path there is no
chance of the path not existing but loading still being possible, so
ImportError should propagate instead of being turned into returning
None.
"""
warnings.warn("importlib.abc.PyLoader is deprecated and is "
"slated for removal in Python 3.4; "
"use SourceLoader instead. "
"See the importlib documentation on how to be "
"compatible with Python 3.1 onwards.",
PendingDeprecationWarning)
path = self.source_path(fullname)
if path is None:
raise ImportError
else:
return path
class PyPycLoader(PyLoader):
"""Abstract base class to assist in loading source and bytecode by
requiring only back-end storage methods to be implemented.
This class has been deprecated! Removal is slated for Python 3.4. Implement
the SourceLoader ABC instead. If Python 3.1 compatibility is needed, see
PyLoader.
The methods get_code, get_source, and load_module are implemented for the
user.
"""
def get_filename(self, fullname):
"""Return the source or bytecode file path."""
path = self.source_path(fullname)
if path is not None:
return path
path = self.bytecode_path(fullname)
if path is not None:
return path
raise ImportError("no source or bytecode path available for "
"{0!r}".format(fullname))
def get_code(self, fullname):
"""Get a code object from source or bytecode."""
warnings.warn("importlib.abc.PyPycLoader is deprecated and slated for "
"removal in Python 3.4; use SourceLoader instead. "
"If Python 3.1 compatibility is required, see the "
"latest documentation for PyLoader.",
PendingDeprecationWarning)
source_timestamp = self.source_mtime(fullname)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(fullname)
if bytecode_path:
data = self.get_data(bytecode_path)
try:
magic = data[:4]
if len(magic) < 4:
raise ImportError("bad magic number in {}".format(fullname))
raw_timestamp = data[4:8]
if len(raw_timestamp) < 4:
raise EOFError("bad timestamp in {}".format(fullname))
pyc_timestamp = marshal._r_long(raw_timestamp)
bytecode = data[8:]
# Verify that the magic number is valid.
if imp.get_magic() != magic:
raise ImportError("bad magic number in {}".format(fullname))
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
raise ImportError("bytecode is stale")
except (ImportError, EOFError):
# If source is available give it a shot.
if source_timestamp is not None:
pass
else:
raise
else:
# Bytecode seems fine, so try to use it.
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
"object for {0!r}".format(fullname))
# Use the source.
source_path = self.source_path(fullname)
if source_path is None:
message = "a source path must exist to load {0}".format(fullname)
raise ImportError(message)
source = self.get_data(source_path)
code_object = compile(source, source_path, 'exec', dont_inherit=True)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_timestamp))
data.extend(marshal.dumps(code_object))
self.write_bytecode(fullname, data)
return code_object
@abc.abstractmethod
def source_mtime(self, fullname):
"""Abstract method. Accepts a str filename and returns an int92' href='#n1092'>1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
|
"""Open an arbitrary URL.
See the following document for more info on URLs:
"Names and Addresses, URIs, URLs, URNs, URCs", at
http://www.w3.org/pub/WWW/Addressing/Overview.html
See also the HTTP spec (from which the error codes are derived):
"HTTP - Hypertext Transfer Protocol", at
http://www.w3.org/pub/WWW/Protocols/
Related standards and specs:
- RFC1808: the "relative URL" spec. (authoritative status)
- RFC1738 - the "URL standard". (authoritative status)
- RFC1630 - the "URI spec". (informational status)
The object returned by URLopener().open(file) will differ per
protocol. All you know is that is has methods read(), readline(),
readlines(), fileno(), close() and info(). The read*(), fileno()
and close() methods work like those of open files.
The info() method returns a mimetools.Message object which can be
used to query various info about the object, if available.
(mimetools.Message objects are queried with the getheader() method.)
"""
import string
import socket
import os
import time
import sys
from urlparse import urljoin as basejoin
import warnings
__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
"urlencode", "url2pathname", "pathname2url", "splittag",
"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
"splittype", "splithost", "splituser", "splitpasswd", "splitport",
"splitnport", "splitquery", "splitattr", "splitvalue",
"getproxies"]
__version__ = '1.17' # XXX This version is not always updated :-(
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
if os.name == 'mac':
from macurl2path import url2pathname, pathname2url
elif os.name == 'nt':
from nturl2path import url2pathname, pathname2url
elif os.name == 'riscos':
from rourl2path import url2pathname, pathname2url
else:
def url2pathname(pathname):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
return unquote(pathname)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
return quote(pathname)
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
# (2) a set of functions for parsing URLs
# XXX Should these be separated out into different modules?
# Shortcut for basic usage
_urlopener = None
def urlopen(url, data=None, proxies=None):
"""Create a file-like object for the specified URL to read from."""
from warnings import warnpy3k
warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
"favor of urllib2.urlopen()", stacklevel=2)
global _urlopener
if proxies is not None:
opener = FancyURLopener(proxies=proxies)
elif not _urlopener:
opener = FancyURLopener()
_urlopener = opener
else:
opener = _urlopener
if data is None:
return opener.open(url)
else:
return opener.open(url, data)
def urlretrieve(url, filename=None, reporthook=None, data=None):
global _urlopener
if not _urlopener:
_urlopener = FancyURLopener()
return _urlopener.retrieve(url, filename, reporthook, data)
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
# check for SSL
try:
import ssl
except:
_have_ssl = False
else:
_have_ssl = True
# exception raised when downloaded size does not match content-length
class ContentTooShortError(IOError):
def __init__(self, message, content):
IOError.__init__(self, message)
self.content = content
ftpcache = {}
class URLopener:
"""Class to open URLs.
This is a class rather than just a subroutine because we may need
more than one set of global protocol-specific options.
Note -- this is a base class for those who don't want the
automatic handling of errors type 302 (relocated) and 401
(authorization needed)."""
__tempfiles = None
version = "Python-urllib/%s" % __version__
# Constructor
def __init__(self, proxies=None, **x509):
if proxies is None:
proxies = getproxies()
assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
self.proxies = proxies
self.key_file = x509.get('key_file')
self.cert_file = x509.get('cert_file')
self.addheaders = [('User-Agent', self.version)]
self.__tempfiles = []
self.__unlink = os.unlink # See cleanup()
self.tempcache = None
# Undocumented feature: if you assign {} to tempcache,
# it is used to cache files retrieved with
# self.retrieve(). This is not enabled by default
# since it does not work for changing documents (and I
# haven't got the logic to check expiration headers
# yet).
self.ftpcache = ftpcache
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
# XXX This is not threadsafe. Bah.
def __del__(self):
self.close()
def close(self):
self.cleanup()
def cleanup(self):
# This code sometimes runs when the rest of this module
# has already been deleted, so it can't use any globals
# or import anything.
if self.__tempfiles:
for file in self.__tempfiles:
try:
self.__unlink(file)
except OSError:
pass
del self.__tempfiles[:]
if self.tempcache:
self.tempcache.clear()
def addheader(self, *args):
"""Add a header to be used by the HTTP interface only
e.g. u.addheader('Accept', 'sound/basic')"""
self.addheaders.append(args)
# External interface
def open(self, fullurl, data=None):
"""Use URLopener().open(file) instead of open(file, 'r')."""
fullurl = unwrap(toBytes(fullurl))
# percent encode url. fixing lame server errors like space within url
# parts
fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
if self.tempcache and fullurl in self.tempcache:
filename, headers = self.tempcache[fullurl]
fp = open(filename, 'rb')
return addinfourl(fp, headers, fullurl)
urltype, url = splittype(fullurl)
if not urltype:
urltype = 'file'
if urltype in self.proxies:
proxy = self.proxies[urltype]
urltype, proxyhost = splittype(proxy)
host, selector = splithost(proxyhost)
url = (host, fullurl) # Signal special case to open_*()
else:
proxy = None
name = 'open_' + urltype
self.type = urltype
name = name.replace('-', '_')
if not hasattr(self, name):
if proxy:
return self.open_unknown_proxy(proxy, fullurl, data)
else:
return self.open_unknown(fullurl, data)
try:
if data is None:
return getattr(self, name)(url)
else:
return getattr(self, name)(url, data)
except socket.error, msg:
raise IOError, ('socket error', msg), sys.exc_info()[2]
def open_unknown(self, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
raise IOError, ('url error', 'unknown url type', type)
def open_unknown_proxy(self, proxy, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
# External interface
def retrieve(self, url, filename=None, reporthook=None, data=None):
"""retrieve(url) returns (filename, headers) for a local object
or (tempfilename, headers) for a remote object."""
url = unwrap(toBytes(url))
if self.tempcache and url in self.tempcache:
return self.tempcache[url]
type, url1 = splittype(url)
if filename is None and (not type or type == 'file'):
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
del fp
return url2pathname(splithost(url1)[1]), hdrs
except IOError, msg:
pass
fp = self.open(url, data)
try:
headers = fp.info()
if filename:
tfp = open(filename, 'wb')
else:
import tempfile
garbage, path = splittype(url)
garbage, path = splithost(path or "")
path, garbage = splitquery(path or "")
path, garbage = splitattr(path or "")
suffix = os.path.splitext(path)[1]
(fd, filename) = tempfile.mkstemp(suffix)
self.__tempfiles.append(filename)
tfp = os.fdopen(fd, 'wb')
try:
result = filename, headers
if self.tempcache is not None:
self.tempcache[url] = result
bs = 1024*8
size = -1
read = 0
blocknum = 0
if reporthook:
if "content-length" in headers:
size = int(headers["Content-Length"])
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
if block == "":
break
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, bs, size)
finally:
tfp.close()
finally:
fp.close()
del fp
del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
raise ContentTooShortError("retrieval incomplete: got only %i out "
"of %i bytes" % (read, size), result)
return result
# Each method named open_<type> knows how to open that type of URL
def open_http(self, url, data=None):
"""Use HTTP protocol."""
import httplib
user_passwd = None
proxy_passwd= None
if isinstance(url, str):
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
host = unquote(host)
realhost = host
else:
host, selector = url
# check whether the proxy contains authorization information
proxy_passwd, host = splituser(host)
# now we proceed with the url we want to obtain
urltype, rest = splittype(selector)
url = rest
user_passwd = None
if urltype.lower() != 'http':
realhost = None
else:
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
if proxy_bypass(realhost):
host = realhost
#print "proxy via http:", host, selector
if not host: raise IOError, ('http error', 'no host given')
if proxy_passwd:
import base64
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
import base64
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
h = httplib.HTTP(host)
if data is not None:
h.putrequest('POST', selector)
h.putheader('Content-Type', 'application/x-www-form-urlencoded')
h.putheader('Content-Length', '%d' % len(data))
else:
h.putrequest('GET', selector)
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
h.endheaders()
if data is not None:
h.send(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
if fp: fp.close()
# something went wrong with the HTTP status line
raise IOError, ('http protocol error', 0,
'got a bad status line', None)
# According to RFC 2616, "2xx" code indicates that the client's
# request was successfully received, understood, and accepted.
if (200 <= errcode < 300):
return addinfourl(fp, headers, "http:" + url, errcode)
else:
if data is None:
return self.http_error(url, fp, errcode, errmsg, headers)
else:
return self.http_error(url, fp, errcode, errmsg, headers, data)
def http_error(self, url, fp, errcode, errmsg, headers, data=None):
"""Handle http errors.
Derived class can override this, or provide specific handlers
named http_error_DDD where DDD is the 3-digit error code."""
# First check if there's a specific handler for this error
name = 'http_error_%d' % errcode
if hasattr(self, name):
method = getattr(self, name)
if data is None:
result = method(url, fp, errcode, errmsg, headers)
else:
result = method(url, fp, errcode, errmsg, headers, data)
if result: return result
return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handler: close the connection and raise IOError."""
void = fp.read()
fp.close()
raise IOError, ('http error', errcode, errmsg, headers)
if _have_ssl:
def open_https(self, url, data=None):
"""Use HTTPS protocol."""
import httplib
user_passwd = None
proxy_passwd = None
if isinstance(url, str):
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
host = unquote(host)
realhost = host
else:
host, selector = url
# here, we determine, whether the proxy contains authorization information
proxy_passwd, host = splituser(host)
urltype, rest = splittype(selector)
url = rest
user_passwd = None
if urltype.lower() != 'https':
realhost = None
else:
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
#print "proxy via https:", host, selector
if not host: raise IOError, ('https error', 'no host given')
if proxy_passwd:
import base64
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
import base64
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
h = httplib.HTTPS(host, 0,
key_file=self.key_file,
cert_file=self.cert_file)
if data is not None:
h.putrequest('POST', selector)
h.putheader('Content-Type',
'application/x-www-form-urlencoded')
h.putheader('Content-Length', '%d' % len(data))
else:
h.putrequest('GET', selector)
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
h.endheaders()
if data is not None:
h.send(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
if fp: fp.close()
# something went wrong with the HTTP status line
raise IOError, ('http protocol error', 0,
'got a bad status line', None)
# According to RFC 2616, "2xx" code indicates that the client's
# request was successfully received, understood, and accepted.
if (200 <= errcode < 300):
return addinfourl(fp, headers, "https:" + url, errcode)
else:
if data is None:
return self.http_error(url, fp, errcode, errmsg, headers)
else:
return self.http_error(url, fp, errcode, errmsg, headers,
data)
def open_file(self, url):
"""Use local file or FTP depending on form of URL."""
if not isinstance(url, str):
raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
return self.open_ftp(url)
else:
return self.open_local_file(url)
def open_local_file(self, url):
"""Use local file."""
import mimetypes, mimetools, email.utils
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
host, file = splithost(url)
localname = url2pathname(file)
try:
stats = os.stat(localname)
except OSError, e:
raise IOError(e.errno, e.strerror, e.filename)
size = stats.st_size
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(url)[0]
headers = mimetools.Message(StringIO(
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified)))
if not host:
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
return addinfourl(open(localname, 'rb'),
headers, urlfile)
host, port = splitport(host)
if not port \
and socket.gethostbyname(host) in (localhost(), thishost()):
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
return addinfourl(open(localname, 'rb'),
headers, urlfile)
raise IOError, ('local file error', 'not on local host')
def open_ftp(self, url):
"""Use FTP protocol."""
if not isinstance(url, str):
raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
import mimetypes, mimetools
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
host, path = splithost(url)
if not host: raise IOError, ('ftp error', 'no host given')
host, port = splitport(host)
user, host = splituser(host)
if user: user, passwd = splitpasswd(user)
else: passwd = None
host = unquote(host)
user = unquote(user or '')
passwd = unquote(passwd or '')
host = socket.gethostbyname(host)
if not port:
import ftplib
port = ftplib.FTP_PORT
else:
port = int(port)
path, attrs = splitattr(path)
path = unquote(path)
dirs = path.split('/')
dirs, file = dirs[:-1], dirs[-1]
if dirs and not dirs[0]: dirs = dirs[1:]
if dirs and not dirs[0]: dirs[0] = '/'
key = user, host, port, '/'.join(dirs)
# XXX thread unsafe!
if len(self.ftpcache) > MAXFTPCACHE:
# Prune the cache, rather arbitrarily
for k in self.ftpcache.keys():
if k != key:
v = self.ftpcache[k]
del self.ftpcache[k]
v.close()
try:
if not key in self.ftpcache:
self.ftpcache[key] = \
ftpwrapper(user, passwd, host, port, dirs)
if not file: type = 'D'
else: type = 'I'
for attr in attrs:
attr, value = splitvalue(attr)
if attr.lower() == 'type' and \
value in ('a', 'A', 'i', 'I', 'd', 'D'):
type = value.upper()
(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
mtype = mimetypes.guess_type("ftp:" + url)[0]
headers = ""
if mtype:
headers += "Content-Type: %s\n" % mtype
if retrlen is not None and retrlen >= 0:
headers += "Content-Length: %d\n" % retrlen
headers = mimetools.Message(StringIO(headers))
return addinfourl(fp, headers, "ftp:" + url)
except ftperrors(), msg:
raise IOError, ('ftp error', msg), sys.exc_info()[2]
def open_data(self, url, data=None):
"""Use "data" URL."""
if not isinstance(url, str):
raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
# ignore POSTed data
#
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
import mimetools
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
try:
[type, data] = url.split(',', 1)
except ValueError:
raise IOError, ('data error', 'bad data URL')
if not type:
type = 'text/plain;charset=US-ASCII'
semi = type.rfind(';')
if semi >= 0 and '=' not in type[semi:]:
encoding = type[semi+1:]
type = type[:semi]
else:
encoding = ''
msg = []
msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
time.gmtime(time.time())))
msg.append('Content-type: %s' % type)
if encoding == 'base64':
import base64
data = base64.decodestring(data)
else:
data = unquote(data)
msg.append('Content-Length: %d' % len(data))
msg.append('')
msg.append(data)
msg = '\n'.join(msg)
f = StringIO(msg)
headers = mimetools.Message(f, 0)
#f.fileno = None # needed for addinfourl
return addinfourl(f, headers, url)
class FancyURLopener(URLopener):
"""Derived class with handlers for errors we can handle (perhaps)."""
def __init__(self, *args, **kwargs):
URLopener.__init__(self, *args, **kwargs)
self.auth_cache = {}
self.tries = 0
self.maxtries = 10
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handling -- don't raise an exception."""
return addinfourl(fp, headers, "http:" + url, errcode)
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 302 -- relocated (temporarily)."""
self.tries += 1
if self.maxtries and self.tries >= self.maxtries:
if hasattr(self, "http_error_500"):
meth = self.http_error_500
else:
meth = self.http_error_default
self.tries = 0
return meth(url, fp, 500,
"Internal Server Error: Redirect Recursion", headers)
result = self.redirect_internal(url, fp, errcode, errmsg, headers,
data)
self.tries = 0
return result
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
if 'location' in headers:
newurl = headers['location']
elif 'uri' in headers:
newurl = headers['uri']
else:
return
void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
newurl = basejoin(self.type + ":" + url, newurl)
return self.open(newurl)
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 301 -- also relocated (permanently)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 303 -- also relocated (essentially identical to 302)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 307 -- relocated, but turn POST into error."""
if data is None:
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
else:
return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required.
This function supports Basic authentication only."""
if not 'www-authenticate' in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
scheme, realm = match.groups()
if scheme.lower() != 'basic':
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
name = 'retry_' + self.type + '_basic_auth'
if data is None:
return getattr(self,name)(url, realm)
else:
return getattr(self,name)(url, realm, data)
def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 407 -- proxy authentication required.
This function supports Basic authentication only."""
if not 'proxy-authenticate' in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
scheme, realm = match.groups()
if scheme.lower() != 'basic':
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
name = 'retry_proxy_' + self.type + '_basic_auth'
if data is None:
return getattr(self,name)(url, realm)
else:
return getattr(self,name)(url, realm, data)
def retry_proxy_http_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
newurl = 'http://' + host + selector
proxy = self.proxies['http']
urltype, proxyhost = splittype(proxy)
proxyhost, proxyselector = splithost(proxyhost)
i = proxyhost.find('@') + 1
proxyhost = proxyhost[i:]
user, passwd = self.get_user_passwd(proxyhost, realm, i)
if not (user or passwd): return None
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
self.proxies['http'] = 'http://' + proxyhost + proxyselector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_proxy_https_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
newurl = 'https://' + host + selector
proxy = self.proxies['https']
urltype, proxyhost = splittype(proxy)
proxyhost, proxyselector = splithost(proxyhost)
i = proxyhost.find('@') + 1
proxyhost = proxyhost[i:]
user, passwd = self.get_user_passwd(proxyhost, realm, i)
if not (user or passwd): return None
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
self.proxies['https'] = 'https://' + proxyhost + proxyselector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_http_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
i = host.find('@') + 1
host = host[i:]
user, passwd = self.get_user_passwd(host, realm, i)
if not (user or passwd): return None
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
newurl = 'http://' + host + selector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_https_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
i = host.find('@') + 1
host = host[i:]
user, passwd = self.get_user_passwd(host, realm, i)
if not (user or passwd): return None
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
newurl = 'https://' + host + selector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def get_user_passwd(self, host, realm, clear_cache = 0):
key = realm + '@' + host.lower()
if key in self.auth_cache:
if clear_cache:
del self.auth_cache[key]
else:
return self.auth_cache[key]
user, passwd = self.prompt_user_passwd(host, realm)
if user or passwd: self.auth_cache[key] = (user, passwd)
return user, passwd
def prompt_user_passwd(self, host, realm):
"""Override this in a GUI environment!"""
import getpass
try:
user = raw_input("Enter username for %s at %s: " % (realm,
host))
passwd = getpass.getpass("Enter password for %s in %s at %s: " %
(user, realm, host))
return user, passwd
except KeyboardInterrupt:
print
return None, None
# Utility functions
_localhost = None
def localhost():
"""Return the IP address of the magic hostname 'localhost'."""
global _localhost
if _localhost is None:
_localhost = socket.gethostbyname('localhost')
return _localhost
_thishost = None
def thishost():
"""Return the IP address of the current host."""
global _thishost
if _thishost is None:
_thishost = socket.gethostbyname(socket.gethostname())
return _thishost
_ftperrors = None
def ftperrors():
"""Return the set of errors raised by the FTP class."""
global _ftperrors
if _ftperrors is None:
import ftplib
_ftperrors = ftplib.all_errors
return _ftperrors
_noheaders = None
def noheaders():
"""Return an empty mimetools.Message object."""
global _noheaders
if _noheaders is None:
import mimetools
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
_noheaders = mimetools.Message(StringIO(), 0)
_noheaders.fp.close() # Recycle file descriptor
return _noheaders
# Utility classes
class ftpwrapper:
"""Class used by open_ftp() for cache of open FTP connections."""
def __init__(self, user, passwd, host, port, dirs,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
self.user = user
self.passwd = passwd
self.host = host
self.port = port
self.dirs = dirs
self.timeout = timeout
self.init()
def init(self):
import ftplib
self.busy = 0
self.ftp = ftplib.FTP()
self.ftp.connect(self.host, self.port, self.timeout)
self.ftp.login(self.user, self.passwd)
for dir in self.dirs:
self.ftp.cwd(dir)
def retrfile(self, file, type):
import ftplib
self.endtransfer()
if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
else: cmd = 'TYPE ' + type; isdir = 0
try:
self.ftp.voidcmd(cmd)
except ftplib.all_errors:
self.init()
self.ftp.voidcmd(cmd)
conn = None
if file and not isdir:
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
conn = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm, reason:
if str(reason)[:3] != '550':
raise IOError, ('ftp error', reason), sys.exc_info()[2]
if not conn:
# Set transfer mode to ASCII!
self.ftp.voidcmd('TYPE A')
# Try a directory listing. Verify that directory exists.
if file:
pwd = self.ftp.pwd()
try:
try:
self.ftp.cwd(file)
except ftplib.error_perm, reason:
raise IOError, ('ftp error', reason), sys.exc_info()[2]
finally:
self.ftp.cwd(pwd)
cmd = 'LIST ' + file
else:
cmd = 'LIST'
conn = self.ftp.ntransfercmd(cmd)
self.busy = 1
# Pass back both a suitably decorated object and a retrieval length
return (addclosehook(conn[0].makefile('rb'),
self.endtransfer), conn[1])
def endtransfer(self):
if not self.busy:
return
self.busy = 0
try:
self.ftp.voidresp()
except ftperrors():
pass
def close(self):
self.endtransfer()
try:
self.ftp.close()
except ftperrors():
pass
class addbase:
"""Base class for addinfo and addclosehook."""
def __init__(self, fp):
self.fp = fp
self.read = self.fp.read
self.readline = self.fp.readline
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"):
self.fileno = self.fp.fileno
else:
self.fileno = lambda: None
if hasattr(self.fp, "__iter__"):
self.__iter__ = self.fp.__iter__
if hasattr(self.fp, "next"):
self.next = self.fp.next
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.fp)
def close(self):
self.read = None
self.readline = None
self.readlines = None
self.fileno = None
if self.fp: self.fp.close()
self.fp = None
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
addbase.__init__(self, fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
addbase.__init__(self, fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addbase):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url, code=None):
addbase.__init__(self, fp)
self.headers = headers
self.url = url
self.code = code
def info(self):
return self.headers
def getcode(self):
return self.code
def geturl(self):
return self.url
# Utilities to parse URLs (most of these return None for missing parts):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
# splitpasswd('user:passwd') -> 'user', 'passwd'
# splitport('host:port') --> 'host', 'port'
# splitquery('/path?query') --> '/path', 'query'
# splittag('/path#tag') --> '/path', 'tag'
# splitattr('/path;attr1=value1;attr2=value2;...') ->
# '/path', ['attr1=value1', 'attr2=value2', ...]
# splitvalue('attr=value') --> 'attr', 'value'
# unquote('abc%20def') -> 'abc def'
# quote('abc def') -> 'abc%20def')
try:
unicode
except NameError:
def _is_unicode(x):
return 0
else:
def _is_unicode(x):
return isinstance(x, unicode)
def toBytes(url):
"""toBytes(u"URL") --> 'URL'."""
# Most URL schemes require ASCII. If that changes, the conversion
# can be relaxed
if _is_unicode(url):
try:
url = url.encode("ASCII")
except UnicodeError:
raise UnicodeError("URL " + repr(url) +
" contains non-ASCII characters")
return url
def unwrap(url):
"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
url = url.strip()
if url[:1] == '<' and url[-1:] == '>':
url = url[1:-1].strip()
if url[:4] == 'URL:': url = url[4:].strip()
return url
_typeprog = None
def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
import re
_typeprog = re.compile('^([^/:]+):')
match = _typeprog.match(url)
if match:
scheme = match.group(1)
return scheme.lower(), url[len(scheme) + 1:]
return None, url
_hostprog = None
def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
import re
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
if match: return match.group(1, 2)
return None, url
_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
global _userprog
if _userprog is None:
import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
if match: return map(unquote, match.group(1, 2))
return None, host
_passwdprog = None
def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
global _passwdprog
if _passwdprog is None:
import re
_passwdprog = re.compile('^([^:]*):(.*)$')
match = _passwdprog.match(user)
if match: return match.group(1, 2)
return user, None
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
import re
_portprog = re.compile('^(.*):([0-9]+)$')
match = _portprog.match(host)
if match: return match.group(1, 2)
return host, None
_nportprog = None
def splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
Return None if ':' but not a valid number."""
global _nportprog
if _nportprog is None:
import re
_nportprog = re.compile('^(.*):(.*)$')
match = _nportprog.match(host)
if match:
host, port = match.group(1, 2)
try:
if not port: raise ValueError, "no digits"
nport = int(port)
except ValueError:
nport = None
return host, nport
return host, defport
_queryprog = None
def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
global _queryprog
if _queryprog is None:
import re
_queryprog = re.compile('^(.*)\?([^?]*)$')
match = _queryprog.match(url)
if match: return match.group(1, 2)
return url, None
_tagprog = None
def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
global _tagprog
if _tagprog is None:
import re
_tagprog = re.compile('^(.*)#([^#]*)$')
match = _tagprog.match(url)
if match: return match.group(1, 2)
return url, None
def splitattr(url):
"""splitattr('/path;attr1=value1;attr2=value2;...') ->
'/path', ['attr1=value1', 'attr2=value2', ...]."""
words = url.split(';')
return words[0], words[1:]
_valueprog = None
def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
global _valueprog
if _valueprog is None:
import re
_valueprog = re.compile('^([^=]*)=(.*)$')
match = _valueprog.match(attr)
if match: return match.group(1, 2)
return attr, None
_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
for i in xrange(1, len(res)):
item = res[i]
try:
res[i] = _hextochr[item[:2]] + item[2:]
except KeyError:
res[i] = '%' + item
except UnicodeDecodeError:
res[i] = unichr(int(item[:2], 16)) + item[2:]
return "".join(res)
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""
s = s.replace('+', ' ')
return unquote(s)
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_safemaps = {}
def quote(s, safe = '/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","
Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.
By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
cachekey = (safe, always_safe)
try:
safe_map = _safemaps[cachekey]
except KeyError:
safe += always_safe
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (c in safe) and c or ('%%%02X' % i)
_safemaps[cachekey] = safe_map
res = map(safe_map.__getitem__, s)
return ''.join(res)
def quote_plus(s, safe = ''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
return s.replace(' ', '+')
return quote(s, safe)
def urlencode(query,doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
"""
if hasattr(query,"items"):
# mapping objects
query = query.items()
else:
# it's a bother at times that strings and string-like objects are
# sequences...
try:
# non-sequence items should not work with len()
# non-empty strings will fail this
if len(query) and not isinstance(query[0], tuple):
raise TypeError
# zero-length sequences of all types will get here and succeed,
# but that's a minor nit - since the original implementation
# allowed empty dicts that type of behavior probably should be
# preserved for consistency
except TypeError:
ty,va,tb = sys.exc_info()
raise TypeError, "not a valid non-string sequence or mapping object", tb
l = []
if not doseq:
# preserve old behavior
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
l.append(k + '=' + v)
elif _is_unicode(v):
# is there a reasonable way to convert to ASCII?
# encode generates a string, but "replace" or "ignore"
# lose information and "strict" can raise UnicodeError
v = quote_plus(v.encode("ASCII","replace"))
l.append(k + '=' + v)
else:
try:
# is this a sufficient test for sequence-ness?
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
return '&'.join(l)
# Proxy handling
def getproxies_environment():
"""Return a dictionary of scheme -> proxy server URL mappings.
Scan the environment for variables named <scheme>_proxy;
this seems to be the standard convention. If you need a
different way, you can pass a proxies dictionary to the
[Fancy]URLopener constructor.
"""
proxies = {}
for name, value in os.environ.items():
name = name.lower()
if value and name[-6:] == '_proxy':
proxies[name[:-6]] = value
return proxies
def proxy_bypass_environment(host):
"""Test if proxies should not be used for a particular host.
Checks the environment for a variable named no_proxy, which should
be a list of DNS suffixes separated by commas, or '*' for all hosts.
"""
no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
# '*' is special case for always bypass
if no_proxy == '*':
return 1
# strip port off host
hostonly, port = splitport(host)
# check if the host ends with any of the DNS suffixes
for name in no_proxy.split(','):
if name and (hostonly.endswith(name) or host.endswith(name)):
return 1
# otherwise, don't bypass
return 0
if sys.platform == 'darwin':
from _scproxy import _get_proxy_settings, _get_proxies
def proxy_bypass_macosx_sysconf(host):
"""
Return True iff this host shouldn't be accessed using a proxy
This function uses the MacOSX framework SystemConfiguration
to fetch the proxy information.
"""
import re
import socket
from fnmatch import fnmatch
def ip2num(ipAddr):
parts = ipAddr.split('.')
parts = map(int, parts)
if len(parts) != 4:
parts = (parts + [0, 0, 0, 0])[:4]
return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
proxy_settings = _get_proxy_settings()
# Check for simple host names:
if '.' not in host:
if proxy_settings['exclude_simple']:
return True
for value in proxy_settings.get('exceptions', ()):
# Items in the list are strings like these: *.local, 169.254/16
if not value: continue
m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
if m is not None:
if hostIP is None:
hostIP = socket.gethostbyname(host)
hostIP = ip2num(hostIP)
base = ip2num(m.group(1))
mask = int(m.group(2)[1:])
mask = 32 - mask
if (hostIP >> mask) == (base >> mask):
return True
elif fnmatch(host, value):
return True
return False
def getproxies_macosx_sysconf():
"""Return a dictionary of scheme -> proxy server URL mappings.
This function uses the MacOSX framework SystemConfiguration
to fetch the proxy information.
"""
return _get_proxies()
def proxy_bypass(host):
if getproxies_environment():
return proxy_bypass_environment(host)
else:
return proxy_bypass_macosx_sysconf(host)
def getproxies():
return getproxies_environment() or getproxies_macosx_sysconf()
elif os.name == 'nt':
def getproxies_registry():
"""Return a dictionary of scheme -> proxy server URL mappings.
Win32 uses the registry to store proxies.
"""
proxies = {}
try:
import _winreg
except ImportError:
# Std module, so should be around - but you never know!
return proxies
try:
internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
proxyEnable = _winreg.QueryValueEx(internetSettings,
'ProxyEnable')[0]
if proxyEnable:
# Returned as Unicode but problems if not converted to ASCII
proxyServer = str(_winreg.QueryValueEx(internetSettings,
'ProxyServer')[0])
if '=' in proxyServer:
# Per-protocol settings
for p in proxyServer.split(';'):
protocol, address = p.split('=', 1)
# See if address has a type:// prefix
import re
if not re.match('^([^/:]+)://', address):
address = '%s://%s' % (protocol, address)
proxies[protocol] = address
else:
# Use one setting for all protocols
if proxyServer[:5] == 'http:':
proxies['http'] = proxyServer
else:
proxies['http'] = 'http://%s' % proxyServer
proxies['ftp'] = 'ftp://%s' % proxyServer
internetSettings.Close()
except (WindowsError, ValueError, TypeError):
# Either registry key not found etc, or the value in an
# unexpected format.
# proxies already set up to be empty so nothing to do
pass
return proxies
def getproxies():
"""Return a dictionary of scheme -> proxy server URL mappings.
Returns settings gathered from the environment, if specified,
or the registry.
"""
return getproxies_environment() or getproxies_registry()
def proxy_bypass_registry(host):
try:
import _winreg
import re
except ImportError:
# Std modules, so should be around - but you never know!
return 0
try:
internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
proxyEnable = _winreg.QueryValueEx(internetSettings,
'ProxyEnable')[0]
proxyOverride = str(_winreg.QueryValueEx(internetSettings,
'ProxyOverride')[0])
# ^^^^ Returned as Unicode but problems if not converted to ASCII
except WindowsError:
return 0
if not proxyEnable or not proxyOverride:
return 0
# try to make a host list from name and IP address.
rawHost, port = splitport(host)
host = [rawHost]
try:
addr = socket.gethostbyname(rawHost)
if addr != rawHost:
host.append(addr)
except socket.error:
pass
try:
fqdn = socket.getfqdn(rawHost)
if fqdn != rawHost:
host.append(fqdn)
except socket.error:
pass
# make a check value list from the registry entry: replace the
# '<local>' string by the localhost entry and the corresponding
# canonical entry.
proxyOverride = proxyOverride.split(';')
i = 0
while i < len(proxyOverride):
if proxyOverride[i] == '<local>':
proxyOverride[i:i+1] = ['localhost',
'127.0.0.1',
socket.gethostname(),
socket.gethostbyname(
socket.gethostname())]
i += 1
# print proxyOverride
# now check if we match one of the registry values.
for test in proxyOverride:
test = test.replace(".", r"\.") # mask dots
test = test.replace("*", r".*") # change glob sequence
test = test.replace("?", r".") # change glob char
for val in host:
# print "%s <--> %s" %( test, val )
if re.match(test, val, re.I):
return 1
return 0
def proxy_bypass(host):
"""Return a dictionary of scheme -> proxy server URL mappings.
Returns settings gathered from the environment, if specified,
or the registry.
"""
if getproxies_environment():
return proxy_bypass_environment(host)
else:
return proxy_bypass_registry(host)
else:
# By default use environment variables
getproxies = getproxies_environment
proxy_bypass = proxy_bypass_environment
# Test and time quote() and unquote()
def test1():
s = ''
for i in range(256): s = s + chr(i)
s = s*4
t0 = time.time()
qs = quote(s)
uqs = unquote(qs)
t1 = time.time()
if uqs != s:
print 'Wrong!'
print repr(s)
print repr(qs)
print repr(uqs)
print round(t1 - t0, 3), 'sec'
def reporthook(blocknum, blocksize, totalsize):
# Report during remote transfers
print "Block number: %d, Block size: %d, Total size: %d" % (
blocknum, blocksize, totalsize)
# Test program
def test(args=[]):
if not args:
args = [
'/etc/passwd',
'file:/etc/passwd',
'file://localhost/etc/passwd',
'ftp://ftp.gnu.org/pub/README',
'http://www.python.org/index.html',
]
if hasattr(URLopener, "open_https"):
args.append('https://synergy.as.cmu.edu/~geek/')
try:
for url in args:
print '-'*10, url, '-'*10
fn, h = urlretrieve(url, None, reporthook)
print fn
if h:
print '======'
for k in h.keys(): print k + ':', h[k]
print '======'
fp = open(fn, 'rb')
data = fp.read()
del fp
if '\r' in data:
table = string.maketrans("", "")
data = data.translate(table, "\r")
print data
fn, h = None, None
print '-'*40
finally:
urlcleanup()
def main():
import getopt, sys
try:
opts, args = getopt.getopt(sys.argv[1:], "th")
except getopt.error, msg:
print msg
print "Use -h for help"
return
t = 0
for o, a in opts:
if o == '-t':
t = t + 1
if o == '-h':
print "Usage: python urllib.py [-t] [url ...]"
print "-t runs self-test;",
print "otherwise, contents of urls are printed"
return
if t:
if t > 1:
test1()
test(args)
else:
if not args:
print "Use -h for help"
for url in args:
print urlopen(url).read(),
# Run test program when run as a script
if __name__ == '__main__':
main()
|