summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/error.py7
-rw-r--r--Lib/urllib/parse.py204
-rw-r--r--Lib/urllib/request.py160
-rw-r--r--Lib/urllib/robotparser.py4
4 files changed, 226 insertions, 149 deletions
diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py
index 45b7169..c5b675d 100644
--- a/Lib/urllib/error.py
+++ b/Lib/urllib/error.py
@@ -35,6 +35,7 @@ class URLError(OSError):
def __str__(self):
return '<urlopen error %s>' % self.reason
+
class HTTPError(URLError, urllib.response.addinfourl):
"""Raised when HTTP error occurs, but also acts like non-error return"""
__super_init = urllib.response.addinfourl.__init__
@@ -55,6 +56,9 @@ class HTTPError(URLError, urllib.response.addinfourl):
def __str__(self):
return 'HTTP Error %s: %s' % (self.code, self.msg)
+ def __repr__(self):
+ return '<HTTPError %s: %r>' % (self.code, self.msg)
+
# since URLError specifies a .reason attribute, HTTPError should also
# provide this attribute. See issue13211 for discussion.
@property
@@ -69,8 +73,9 @@ class HTTPError(URLError, urllib.response.addinfourl):
def headers(self, headers):
self.hdrs = headers
-# exception raised when downloaded size does not match content-length
+
class ContentTooShortError(URLError):
+ """Exception raised when downloaded size does not match content-length."""
def __init__(self, message, content):
URLError.__init__(self, message)
self.content = content
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index d368331..4d7fcec 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -34,7 +34,9 @@ import collections
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
- "unquote", "unquote_plus", "unquote_to_bytes"]
+ "unquote", "unquote_plus", "unquote_to_bytes",
+ "DefragResult", "ParseResult", "SplitResult",
+ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
@@ -409,11 +411,13 @@ def urljoin(base, url, allow_fragments=True):
return url
if not url:
return base
+
base, url, _coerce_result = _coerce_args(base, url)
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
urlparse(base, '', allow_fragments)
scheme, netloc, path, params, query, fragment = \
urlparse(url, bscheme, allow_fragments)
+
if scheme != bscheme or scheme not in uses_relative:
return _coerce_result(url)
if scheme in uses_netloc:
@@ -421,9 +425,7 @@ def urljoin(base, url, allow_fragments=True):
return _coerce_result(urlunparse((scheme, netloc, path,
params, query, fragment)))
netloc = bnetloc
- if path[:1] == '/':
- return _coerce_result(urlunparse((scheme, netloc, path,
- params, query, fragment)))
+
if not path and not params:
path = bpath
params = bparams
@@ -431,29 +433,45 @@ def urljoin(base, url, allow_fragments=True):
query = bquery
return _coerce_result(urlunparse((scheme, netloc, path,
params, query, fragment)))
- segments = bpath.split('/')[:-1] + path.split('/')
- # XXX The stuff below is bogus in various ways...
- if segments[-1] == '.':
- segments[-1] = ''
- while '.' in segments:
- segments.remove('.')
- while 1:
- i = 1
- n = len(segments) - 1
- while i < n:
- if (segments[i] == '..'
- and segments[i-1] not in ('', '..')):
- del segments[i-1:i+1]
- break
- i = i+1
+
+ base_parts = bpath.split('/')
+ if base_parts[-1] != '':
+ # the last item is not a directory, so will not be taken into account
+ # in resolving the relative path
+ del base_parts[-1]
+
+ # for rfc3986, ignore all base path should the first character be root.
+ if path[:1] == '/':
+ segments = path.split('/')
+ else:
+ segments = base_parts + path.split('/')
+ # filter out elements that would cause redundant slashes on re-joining
+ # the resolved_path
+ segments[1:-1] = filter(None, segments[1:-1])
+
+ resolved_path = []
+
+ for seg in segments:
+ if seg == '..':
+ try:
+ resolved_path.pop()
+ except IndexError:
+ # ignore any .. segments that would otherwise cause an IndexError
+ # when popped from resolved_path if resolving for rfc3986
+ pass
+ elif seg == '.':
+ continue
else:
- break
- if segments == ['', '..']:
- segments[-1] = ''
- elif len(segments) >= 2 and segments[-1] == '..':
- segments[-2:] = ['']
- return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
- params, query, fragment)))
+ resolved_path.append(seg)
+
+ if segments[-1] in ('.', '..'):
+ # do some post-processing here. if the last segment was a relative dir,
+ # then we need to append the trailing '/'
+ resolved_path.append('')
+
+ return _coerce_result(urlunparse((scheme, netloc, '/'.join(
+ resolved_path) or '/', params, query, fragment)))
+
def urldefrag(url):
"""Removes any existing fragment from URL.
@@ -641,7 +659,7 @@ class Quoter(collections.defaultdict):
def __repr__(self):
# Without this, will just display as a defaultdict
- return "<Quoter %r>" % dict(self)
+ return "<%s %r>" % (self.__class__.__name__, dict(self))
def __missing__(self, b):
# Handle a cache miss. Store quoted string in cache and return.
@@ -732,7 +750,8 @@ def quote_from_bytes(bs, safe='/'):
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
return ''.join([quoter(char) for char in bs])
-def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
+def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
+ quote_via=quote_plus):
"""Encode a dict or sequence of two-element tuples into a URL query string.
If any values in the query arg are sequences and doseq is true, each
@@ -744,8 +763,8 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
The components of a query arg may each be either a string or a bytes type.
- The safe, encoding, and errors parameters are passed down to quote_plus()
- (encoding and errors only if a component is a str).
+ The safe, encoding, and errors parameters are passed down to the function
+ specified by quote_via (encoding and errors only if a component is a str).
"""
if hasattr(query, "items"):
@@ -771,27 +790,27 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
if not doseq:
for k, v in query:
if isinstance(k, bytes):
- k = quote_plus(k, safe)
+ k = quote_via(k, safe)
else:
- k = quote_plus(str(k), safe, encoding, errors)
+ k = quote_via(str(k), safe, encoding, errors)
if isinstance(v, bytes):
- v = quote_plus(v, safe)
+ v = quote_via(v, safe)
else:
- v = quote_plus(str(v), safe, encoding, errors)
+ v = quote_via(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
for k, v in query:
if isinstance(k, bytes):
- k = quote_plus(k, safe)
+ k = quote_via(k, safe)
else:
- k = quote_plus(str(k), safe, encoding, errors)
+ k = quote_via(str(k), safe, encoding, errors)
if isinstance(v, bytes):
- v = quote_plus(v, safe)
+ v = quote_via(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
- v = quote_plus(v, safe, encoding, errors)
+ v = quote_via(v, safe, encoding, errors)
l.append(k + '=' + v)
else:
try:
@@ -799,33 +818,18 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
x = len(v)
except TypeError:
# not a sequence
- v = quote_plus(str(v), safe, encoding, errors)
+ v = quote_via(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
if isinstance(elt, bytes):
- elt = quote_plus(elt, safe)
+ elt = quote_via(elt, safe)
else:
- elt = quote_plus(str(elt), safe, encoding, errors)
+ elt = quote_via(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l)
-# Utilities to parse URLs (most of these return None for missing parts):
-# unwrap('<URL:type://host/path>') --> 'type://host/path'
-# splittype('type:opaquestring') --> 'type', 'opaquestring'
-# splithost('//host[:port]/path') --> 'host[:port]', '/path'
-# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
-# splitpasswd('user:passwd') -> 'user', 'passwd'
-# splitport('host:port') --> 'host', 'port'
-# splitquery('/path?query') --> '/path', 'query'
-# splittag('/path#tag') --> '/path', 'tag'
-# splitattr('/path;attr1=value1;attr2=value2;...') ->
-# '/path', ['attr1=value1', 'attr2=value2', ...]
-# splitvalue('attr=value') --> 'attr', 'value'
-# urllib.parse.unquote('abc%20def') -> 'abc def'
-# quote('abc def') -> 'abc%20def')
-
def to_bytes(url):
"""to_bytes(u"URL") --> 'URL'."""
# Most URL schemes require ASCII. If that changes, the conversion
@@ -852,12 +856,12 @@ def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
- _typeprog = re.compile('^([^/:]+):')
+ _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
match = _typeprog.match(url)
if match:
- scheme = match.group(1)
- return scheme.lower(), url[len(scheme) + 1:]
+ scheme, data = match.groups()
+ return scheme.lower(), data
return None, url
_hostprog = None
@@ -865,38 +869,25 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- _hostprog = re.compile('^//([^/?]*)(.*)$')
+ _hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match:
- host_port = match.group(1)
- path = match.group(2)
- if path and not path.startswith('/'):
+ host_port, path = match.groups()
+ if path and path[0] != '/':
path = '/' + path
return host_port, path
return None, url
-_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
- global _userprog
- if _userprog is None:
- _userprog = re.compile('^(.*)@(.*)$')
-
- match = _userprog.match(host)
- if match: return match.group(1, 2)
- return None, host
+ user, delim, host = host.rpartition('@')
+ return (user if delim else None), host
-_passwdprog = None
def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
- global _passwdprog
- if _passwdprog is None:
- _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
-
- match = _passwdprog.match(user)
- if match: return match.group(1, 2)
- return user, None
+ user, delim, passwd = user.partition(':')
+ return user, (passwd if delim else None)
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
@@ -904,7 +895,7 @@ def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
- _portprog = re.compile('^(.*):([0-9]*)$')
+ _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
match = _portprog.match(host)
if match:
@@ -913,47 +904,34 @@ def splitport(host):
return host, port
return host, None
-_nportprog = None
def splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
Return None if ':' but not a valid number."""
- global _nportprog
- if _nportprog is None:
- _nportprog = re.compile('^(.*):(.*)$')
-
- match = _nportprog.match(host)
- if match:
- host, port = match.group(1, 2)
- if port:
- try:
- nport = int(port)
- except ValueError:
- nport = None
- return host, nport
+ host, delim, port = host.rpartition(':')
+ if not delim:
+ host = port
+ elif port:
+ try:
+ nport = int(port)
+ except ValueError:
+ nport = None
+ return host, nport
return host, defport
-_queryprog = None
def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
- global _queryprog
- if _queryprog is None:
- _queryprog = re.compile('^(.*)\?([^?]*)$')
-
- match = _queryprog.match(url)
- if match: return match.group(1, 2)
+ path, delim, query = url.rpartition('?')
+ if delim:
+ return path, query
return url, None
-_tagprog = None
def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
- global _tagprog
- if _tagprog is None:
- _tagprog = re.compile('^(.*)#([^#]*)$')
-
- match = _tagprog.match(url)
- if match: return match.group(1, 2)
+ path, delim, tag = url.rpartition('#')
+ if delim:
+ return path, tag
return url, None
def splitattr(url):
@@ -962,13 +940,7 @@ def splitattr(url):
words = url.split(';')
return words[0], words[1:]
-_valueprog = None
def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
- global _valueprog
- if _valueprog is None:
- _valueprog = re.compile('^([^=]*)=(.*)$')
-
- match = _valueprog.match(attr)
- if match: return match.group(1, 2)
- return attr, None
+ attr, delim, value = attr.partition('=')
+ return attr, (value if delim else None)
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index f769386..3be327d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -91,6 +91,7 @@ import os
import posixpath
import re
import socket
+import string
import sys
import time
import collections
@@ -120,9 +121,10 @@ __all__ = [
'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
- 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
- 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
- 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
+ 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
+ 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
+ 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
+ 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
'UnknownHandler', 'HTTPErrorProcessor',
# Functions
'urlopen', 'install_opener', 'build_opener',
@@ -615,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
- # be conciliant with URIs containing a space
+
+ # Be conciliant with URIs containing a space. This is mainly
+ # redundant with the more complete encoding done in http_error_302(),
+ # but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20')
+
CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS)
@@ -651,11 +657,16 @@ class HTTPRedirectHandler(BaseHandler):
"%s - Redirection to url '%s' is not allowed" % (msg, newurl),
headers, fp)
- if not urlparts.path:
+ if not urlparts.path and urlparts.netloc:
urlparts = list(urlparts)
urlparts[2] = "/"
newurl = urlunparse(urlparts)
+ # http.client.parse_headers() decodes as ISO-8859-1. Recover the
+ # original bytes and percent-encode non-ASCII bytes, and any special
+ # characters such as the space.
+ newurl = quote(
+ newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current
@@ -836,6 +847,37 @@ class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
return HTTPPasswordMgr.find_user_password(self, None, authuri)
+class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
+
+ def __init__(self, *args, **kwargs):
+ self.authenticated = {}
+ super().__init__(*args, **kwargs)
+
+ def add_password(self, realm, uri, user, passwd, is_authenticated=False):
+ self.update_authenticated(uri, is_authenticated)
+ # Add a default for prior auth requests
+ if realm is not None:
+ super().add_password(None, uri, user, passwd)
+ super().add_password(realm, uri, user, passwd)
+
+ def update_authenticated(self, uri, is_authenticated=False):
+ # uri could be a single URI or a sequence
+ if isinstance(uri, str):
+ uri = [uri]
+
+ for default_port in True, False:
+ for u in uri:
+ reduced_uri = self.reduce_uri(u, default_port)
+ self.authenticated[reduced_uri] = is_authenticated
+
+ def is_authenticated(self, authuri):
+ for default_port in True, False:
+ reduced_authuri = self.reduce_uri(authuri, default_port)
+ for uri in self.authenticated:
+ if self.is_suburi(uri, reduced_authuri):
+ return self.authenticated[uri]
+
+
class AbstractBasicAuthHandler:
# XXX this allows for multiple auth-schemes, but will stupidly pick
@@ -890,6 +932,31 @@ class AbstractBasicAuthHandler:
else:
return None
+ def http_request(self, req):
+ if (not hasattr(self.passwd, 'is_authenticated') or
+ not self.passwd.is_authenticated(req.full_url)):
+ return req
+
+ if not req.has_header('Authorization'):
+ user, passwd = self.passwd.find_user_password(None, req.full_url)
+ credentials = '{0}:{1}'.format(user, passwd).encode()
+ auth_str = base64.standard_b64encode(credentials).decode()
+ req.add_unredirected_header('Authorization',
+ 'Basic {}'.format(auth_str.strip()))
+ return req
+
+ def http_response(self, req, response):
+ if hasattr(self.passwd, 'is_authenticated'):
+ if 200 <= response.code < 300:
+ self.passwd.update_authenticated(req.full_url, True)
+ else:
+ self.passwd.update_authenticated(req.full_url, False)
+ return response
+
+ https_request = http_request
+ https_response = http_response
+
+
class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
@@ -1054,6 +1121,9 @@ class AbstractDigestAuthHandler:
elif algorithm == 'SHA':
H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
# XXX MD5-sess
+ else:
+ raise ValueError("Unsupported digest authentication "
+ "algorithm %r" % algorithm)
KD = lambda s, d: H("%s:%s" % (s, d))
return H, KD
@@ -1151,6 +1221,7 @@ class AbstractHTTPHandler(BaseHandler):
# will parse host:port
h = http_class(host, timeout=req.timeout, **http_conn_args)
+ h.set_debuglevel(self._debuglevel)
headers = dict(req.unredirected_hdrs)
headers.update(dict((k, v) for k, v in req.headers.items()
@@ -1993,18 +2064,20 @@ class FancyURLopener(URLopener):
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 302 -- relocated (temporarily)."""
self.tries += 1
- if self.maxtries and self.tries >= self.maxtries:
- if hasattr(self, "http_error_500"):
- meth = self.http_error_500
- else:
- meth = self.http_error_default
+ try:
+ if self.maxtries and self.tries >= self.maxtries:
+ if hasattr(self, "http_error_500"):
+ meth = self.http_error_500
+ else:
+ meth = self.http_error_default
+ return meth(url, fp, 500,
+ "Internal Server Error: Redirect Recursion",
+ headers)
+ result = self.redirect_internal(url, fp, errcode, errmsg,
+ headers, data)
+ return result
+ finally:
self.tries = 0
- return meth(url, fp, 500,
- "Internal Server Error: Redirect Recursion", headers)
- result = self.redirect_internal(url, fp, errcode, errmsg, headers,
- data)
- self.tries = 0
- return result
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
if 'location' in headers:
@@ -2333,26 +2406,41 @@ def getproxies_environment():
"""
proxies = {}
+ # in order to prefer lowercase variables, process environment in
+ # two passes: first matches any, second pass matches lowercase only
for name, value in os.environ.items():
name = name.lower()
if value and name[-6:] == '_proxy':
proxies[name[:-6]] = value
-
# CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
# (non-all-lowercase) as it may be set from the web server by a "Proxy:"
# header from the client
+ # If "proxy" is lowercase, it will still be used thanks to the next block
if 'REQUEST_METHOD' in os.environ:
proxies.pop('http', None)
-
+ for name, value in os.environ.items():
+ if name[-6:] == '_proxy':
+ name = name.lower()
+ if value:
+ proxies[name[:-6]] = value
+ else:
+ proxies.pop(name[:-6], None)
return proxies
-def proxy_bypass_environment(host):
+def proxy_bypass_environment(host, proxies=None):
"""Test if proxies should not be used for a particular host.
- Checks the environment for a variable named no_proxy, which should
- be a list of DNS suffixes separated by commas, or '*' for all hosts.
+ Checks the proxy dict for the value of no_proxy, which should
+ be a list of comma separated DNS suffixes, or '*' for all hosts.
+
"""
- no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
+ if proxies is None:
+ proxies = getproxies_environment()
+ # don't bypass, if no_proxy isn't specified
+ try:
+ no_proxy = proxies['no']
+ except KeyError:
+ return 0
# '*' is special case for always bypass
if no_proxy == '*':
return 1
@@ -2361,8 +2449,12 @@ def proxy_bypass_environment(host):
# check if the host ends with any of the DNS suffixes
no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
for name in no_proxy_list:
- if name and (hostonly.endswith(name) or host.endswith(name)):
- return 1
+ if name:
+ name = re.escape(name)
+ pattern = r'(.+\.)?%s$' % name
+ if (re.match(pattern, hostonly, re.I)
+ or re.match(pattern, host, re.I)):
+ return 1
# otherwise, don't bypass
return 0
@@ -2447,8 +2539,15 @@ if sys.platform == 'darwin':
def proxy_bypass(host):
- if getproxies_environment():
- return proxy_bypass_environment(host)
+ """Return True, if host should be bypassed.
+
+ Checks proxy settings gathered from the environment, if specified,
+ or from the MacOSX framework SystemConfiguration.
+
+ """
+ proxies = getproxies_environment()
+ if proxies:
+ return proxy_bypass_environment(host, proxies)
else:
return proxy_bypass_macosx_sysconf(host)
@@ -2562,14 +2661,15 @@ elif os.name == 'nt':
return 0
def proxy_bypass(host):
- """Return a dictionary of scheme -> proxy server URL mappings.
+ """Return True, if host should be bypassed.
- Returns settings gathered from the environment, if specified,
+ Checks proxy settings gathered from the environment, if specified,
or the registry.
"""
- if getproxies_environment():
- return proxy_bypass_environment(host)
+ proxies = getproxies_environment()
+ if proxies:
+ return proxy_bypass_environment(host, proxies)
else:
return proxy_bypass_registry(host)
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 1d7b751..8b69fd9 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -132,7 +132,7 @@ class RobotFileParser:
return True
# Until the robots.txt file has been read or found not
# to exist, we must assume that no url is allowable.
- # This prevents false positives when a user erronenously
+ # This prevents false positives when a user erroneously
# calls can_fetch() before calling read().
if not self.last_checked:
return False
@@ -172,7 +172,7 @@ class RuleLine:
return self.path == "*" or filename.startswith(self.path)
def __str__(self):
- return (self.allowance and "Allow" or "Disallow") + ": " + self.path
+ return ("Allow" if self.allowance else "Disallow") + ": " + self.path
class Entry: