diff options
Diffstat (limited to 'Doc/howto/urllib2.rst')
-rw-r--r-- | Doc/howto/urllib2.rst | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index d74729b..b71b300 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -10,7 +10,7 @@ HOWTO, available at `urllib2 - Le Manuel manquant <http://www.voidspace.org.uk/python/articles/urllib2_francais.shtml>`_. - + Introduction ============ @@ -19,9 +19,9 @@ Introduction You may also find useful the following article on fetching web resources with Python: - + * `Basic Authentication <http://www.voidspace.org.uk/python/articles/authentication.shtml>`_ - + A tutorial on *Basic Authentication*, with examples in Python. **urllib.request** is a `Python <http://www.python.org>`_ module for fetching URLs @@ -98,7 +98,7 @@ argument. The encoding is done using a function from the :mod:`urllib.parse` library. :: import urllib.parse - import urllib.request + import urllib.request url = 'http://www.someserver.com/cgi-bin/register.cgi' values = {'name' : 'Michael Foord', @@ -161,15 +161,15 @@ request as above, but identifies itself as a version of Internet Explorer [#]_. :: import urllib.parse - import urllib.request - + import urllib.request + url = 'http://www.someserver.com/cgi-bin/register.cgi' - user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' + user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } headers = { 'User-Agent' : user_agent } - + data = urllib.parse.urlencode(values) req = urllib.request.Request(url, data, headers) response = urllib.request.urlopen(req) @@ -183,7 +183,7 @@ Handling Exceptions =================== *urlopen* raises :exc:`URLError` when it cannot handle a response (though as usual -with Python APIs, builtin exceptions such as +with Python APIs, builtin exceptions such as :exc:`ValueError`, :exc:`TypeError` etc. may also be raised). @@ -311,18 +311,18 @@ page returned. This means that as well as the code attribute, it also has read, geturl, and info, methods as returned by the ``urllib.response`` module:: >>> req = urllib.request.Request('http://www.python.org/fish.html') - >>> try: + >>> try: >>> urllib.request.urlopen(req) >>> except urllib.error.URLError, e: >>> print(e.code) >>> print(e.read()) - >>> + >>> 404 - <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> - <?xml-stylesheet href="./css/ht2html.css" + <?xml-stylesheet href="./css/ht2html.css" type="text/css"?> - <html><head><title>Error 404: File Not Found</title> + <html><head><title>Error 404: File Not Found</title> ...... etc... Wrapping it Up @@ -376,7 +376,7 @@ Number 2 print('Error code: ', e.code) else: # everything is fine - + info and geturl =============== @@ -448,7 +448,7 @@ error code) requesting authentication. This specifies the authentication scheme and a 'realm'. The header looks like : ``Www-authenticate: SCHEME realm="REALM"``. -e.g. :: +e.g. :: Www-authenticate: Basic realm="cPanel Users" @@ -472,24 +472,24 @@ The top-level URL is the first URL that requires authentication. URLs "deeper" than the URL you pass to .add_password() will also match. :: # create a password manager - password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() + password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. # If we knew the realm, we could use it instead of ``None``. top_level_url = "http://example.com/foo/" password_mgr.add_password(None, top_level_url, username, password) - handler = urllib.request.HTTPBasicAuthHandler(password_mgr) + handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) - opener = urllib.request.build_opener(handler) + opener = urllib.request.build_opener(handler) # use the opener to fetch a URL - opener.open(a_url) + opener.open(a_url) # Install the opener. # Now all calls to urllib.request.urlopen use our opener. - urllib.request.install_opener(opener) + urllib.request.install_opener(opener) .. note:: @@ -545,7 +545,7 @@ However, you can set the default timeout globally for all sockets using :: # timeout in seconds timeout = 10 - socket.setdefaulttimeout(timeout) + socket.setdefaulttimeout(timeout) # this call to urllib.request.urlopen now uses the default timeout # we have set in the socket module @@ -562,7 +562,7 @@ Footnotes This document was reviewed and revised by John Lee. .. [#] For an introduction to the CGI protocol see - `Writing Web Applications in Python <http://www.pyzine.com/Issue008/Section_Articles/article_CGIOne.html>`_. + `Writing Web Applications in Python <http://www.pyzine.com/Issue008/Section_Articles/article_CGIOne.html>`_. .. [#] Like Google for example. The *proper* way to use google from a program is to use `PyGoogle <http://pygoogle.sourceforge.net>`_ of course. See `Voidspace Google <http://www.voidspace.org.uk/python/recipebook.shtml#google>`_ @@ -579,6 +579,6 @@ This document was reviewed and revised by John Lee. is set to use the proxy, which urllib picks up on. In order to test scripts with a localhost server, I have to prevent urllib from using the proxy. -.. [#] urllib opener for SSL proxy (CONNECT method): `ASPN Cookbook Recipe +.. [#] urllib opener for SSL proxy (CONNECT method): `ASPN Cookbook Recipe <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/456195>`_. - + |