summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/parse.py161
-rw-r--r--Lib/urllib/request.py15
-rw-r--r--Lib/urllib/robotparser.py2
3 files changed, 90 insertions, 88 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index d368331..6012d35 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -34,7 +34,9 @@ import collections
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
- "unquote", "unquote_plus", "unquote_to_bytes"]
+ "unquote", "unquote_plus", "unquote_to_bytes",
+ "DefragResult", "ParseResult", "SplitResult",
+ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
@@ -409,11 +411,13 @@ def urljoin(base, url, allow_fragments=True):
return url
if not url:
return base
+
base, url, _coerce_result = _coerce_args(base, url)
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
urlparse(base, '', allow_fragments)
scheme, netloc, path, params, query, fragment = \
urlparse(url, bscheme, allow_fragments)
+
if scheme != bscheme or scheme not in uses_relative:
return _coerce_result(url)
if scheme in uses_netloc:
@@ -421,9 +425,7 @@ def urljoin(base, url, allow_fragments=True):
return _coerce_result(urlunparse((scheme, netloc, path,
params, query, fragment)))
netloc = bnetloc
- if path[:1] == '/':
- return _coerce_result(urlunparse((scheme, netloc, path,
- params, query, fragment)))
+
if not path and not params:
path = bpath
params = bparams
@@ -431,29 +433,46 @@ def urljoin(base, url, allow_fragments=True):
query = bquery
return _coerce_result(urlunparse((scheme, netloc, path,
params, query, fragment)))
- segments = bpath.split('/')[:-1] + path.split('/')
- # XXX The stuff below is bogus in various ways...
- if segments[-1] == '.':
- segments[-1] = ''
- while '.' in segments:
- segments.remove('.')
- while 1:
- i = 1
- n = len(segments) - 1
- while i < n:
- if (segments[i] == '..'
- and segments[i-1] not in ('', '..')):
- del segments[i-1:i+1]
- break
- i = i+1
+
+ base_parts = bpath.split('/')
+ if base_parts[-1] != '':
+ # the last item is not a directory, so will not be taken into account
+ # in resolving the relative path
+ del base_parts[-1]
+
+ # for rfc3986, ignore all base path should the first character be root.
+ if path[:1] == '/':
+ segments = path.split('/')
+ else:
+ segments = base_parts + path.split('/')
+ # filter out elements that would cause redundant slashes on re-joining
+ # the resolved_path
+ segments = segments[0:1] + [
+ s for s in segments[1:-1] if len(s) > 0] + segments[-1:]
+
+ resolved_path = []
+
+ for seg in segments:
+ if seg == '..':
+ try:
+ resolved_path.pop()
+ except IndexError:
+ # ignore any .. segments that would otherwise cause an IndexError
+ # when popped from resolved_path if resolving for rfc3986
+ pass
+ elif seg == '.':
+ continue
else:
- break
- if segments == ['', '..']:
- segments[-1] = ''
- elif len(segments) >= 2 and segments[-1] == '..':
- segments[-2:] = ['']
- return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),
- params, query, fragment)))
+ resolved_path.append(seg)
+
+ if segments[-1] in ('.', '..'):
+ # do some post-processing here. if the last segment was a relative dir,
+ # then we need to append the trailing '/'
+ resolved_path.append('')
+
+ return _coerce_result(urlunparse((scheme, netloc, '/'.join(
+ resolved_path) or '/', params, query, fragment)))
+
def urldefrag(url):
"""Removes any existing fragment from URL.
@@ -641,7 +660,7 @@ class Quoter(collections.defaultdict):
def __repr__(self):
# Without this, will just display as a defaultdict
- return "<Quoter %r>" % dict(self)
+ return "<%s %r>" % (self.__class__.__name__, dict(self))
def __missing__(self, b):
# Handle a cache miss. Store quoted string in cache and return.
@@ -852,12 +871,12 @@ def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
- _typeprog = re.compile('^([^/:]+):')
+ _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
match = _typeprog.match(url)
if match:
- scheme = match.group(1)
- return scheme.lower(), url[len(scheme) + 1:]
+ scheme, data = match.groups()
+ return scheme.lower(), data
return None, url
_hostprog = None
@@ -865,38 +884,25 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- _hostprog = re.compile('^//([^/?]*)(.*)$')
+ _hostprog = re.compile('//([^/?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match:
- host_port = match.group(1)
- path = match.group(2)
- if path and not path.startswith('/'):
+ host_port, path = match.groups()
+ if path and path[0] != '/':
path = '/' + path
return host_port, path
return None, url
-_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
- global _userprog
- if _userprog is None:
- _userprog = re.compile('^(.*)@(.*)$')
+ user, delim, host = host.rpartition('@')
+ return (user if delim else None), host
- match = _userprog.match(host)
- if match: return match.group(1, 2)
- return None, host
-
-_passwdprog = None
def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
- global _passwdprog
- if _passwdprog is None:
- _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
-
- match = _passwdprog.match(user)
- if match: return match.group(1, 2)
- return user, None
+ user, delim, passwd = user.partition(':')
+ return user, (passwd if delim else None)
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
@@ -904,7 +910,7 @@ def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
- _portprog = re.compile('^(.*):([0-9]*)$')
+ _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
match = _portprog.match(host)
if match:
@@ -913,47 +919,34 @@ def splitport(host):
return host, port
return host, None
-_nportprog = None
def splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
Return None if ':' but not a valid number."""
- global _nportprog
- if _nportprog is None:
- _nportprog = re.compile('^(.*):(.*)$')
-
- match = _nportprog.match(host)
- if match:
- host, port = match.group(1, 2)
- if port:
- try:
- nport = int(port)
- except ValueError:
- nport = None
- return host, nport
+ host, delim, port = host.rpartition(':')
+ if not delim:
+ host = port
+ elif port:
+ try:
+ nport = int(port)
+ except ValueError:
+ nport = None
+ return host, nport
return host, defport
-_queryprog = None
def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
- global _queryprog
- if _queryprog is None:
- _queryprog = re.compile('^(.*)\?([^?]*)$')
-
- match = _queryprog.match(url)
- if match: return match.group(1, 2)
+ path, delim, query = url.rpartition('?')
+ if delim:
+ return path, query
return url, None
-_tagprog = None
def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
- global _tagprog
- if _tagprog is None:
- _tagprog = re.compile('^(.*)#([^#]*)$')
-
- match = _tagprog.match(url)
- if match: return match.group(1, 2)
+ path, delim, tag = url.rpartition('#')
+ if delim:
+ return path, tag
return url, None
def splitattr(url):
@@ -962,13 +955,7 @@ def splitattr(url):
words = url.split(';')
return words[0], words[1:]
-_valueprog = None
def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
- global _valueprog
- if _valueprog is None:
- _valueprog = re.compile('^([^=]*)=(.*)$')
-
- match = _valueprog.match(attr)
- if match: return match.group(1, 2)
- return attr, None
+ attr, delim, value = attr.partition('=')
+ return attr, (value if delim else None)
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 5cf0cf2..2e436ec 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -916,6 +916,21 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
return response
+class HTTPBasicPriorAuthHandler(HTTPBasicAuthHandler):
+ handler_order = 400
+
+ def http_request(self, req):
+ if not req.has_header('Authorization'):
+ user, passwd = self.passwd.find_user_password(None, req.host)
+ credentials = '{0}:{1}'.format(user, passwd).encode()
+ auth_str = base64.standard_b64encode(credentials).decode()
+ req.add_unredirected_header('Authorization',
+ 'Basic {}'.format(auth_str.strip()))
+ return req
+
+ https_request = http_request
+
+
# Return n random bytes.
_randombytes = os.urandom
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 1d7b751..4fbb0cb 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -172,7 +172,7 @@ class RuleLine:
return self.path == "*" or filename.startswith(self.path)
def __str__(self):
- return (self.allowance and "Allow" or "Disallow") + ": " + self.path
+ return ("Allow" if self.allowance else "Disallow") + ": " + self.path
class Entry: