summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-04-02 00:52:20 (GMT)
committerGitHub <noreply@github.com>2020-04-02 00:52:20 (GMT)
commit0b297d4ff1c0e4480ad33acae793fbaf4bf015b4 (patch)
tree7f39cf8cddaf63245f29e784ee570586d902afed /Lib/urllib
parentd57cf557366584539f400db523b555296487e8f5 (diff)
downloadcpython-0b297d4ff1c0e4480ad33acae793fbaf4bf015b4.zip
cpython-0b297d4ff1c0e4480ad33acae793fbaf4bf015b4.tar.gz
cpython-0b297d4ff1c0e4480ad33acae793fbaf4bf015b4.tar.bz2
bpo-39503: CVE-2020-8492: Fix AbstractBasicAuthHandler (GH-18284)
The AbstractBasicAuthHandler class of the urllib.request module uses an inefficient regular expression which can be exploited by an attacker to cause a denial of service. Fix the regex to prevent the catastrophic backtracking. Vulnerability reported by Ben Caller and Matt Schwager. AbstractBasicAuthHandler of urllib.request now parses all WWW-Authenticate HTTP headers and accepts multiple challenges per header: use the realm of the first Basic challenge. Co-Authored-By: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/request.py69
1 files changed, 50 insertions, 19 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 7fe5053..2a3d715 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -937,8 +937,15 @@ class AbstractBasicAuthHandler:
# allow for double- and single-quoted realm values
# (single quotes are a violation of the RFC, but appear in the wild)
- rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\']?)([^"\']*)\\2', re.I)
+ rx = re.compile('(?:^|,)' # start of the string or ','
+ '[ \t]*' # optional whitespaces
+ '([^ \t]+)' # scheme like "Basic"
+ '[ \t]+' # mandatory whitespaces
+ # realm=xxx
+ # realm='xxx'
+ # realm="xxx"
+ 'realm=(["\']?)([^"\']*)\\2',
+ re.I)
# XXX could pre-emptively send auth info already accepted (RFC 2617,
# end of section 2, and section 1.2 immediately after "credentials"
@@ -950,27 +957,51 @@ class AbstractBasicAuthHandler:
self.passwd = password_mgr
self.add_password = self.passwd.add_password
+ def _parse_realm(self, header):
+ # parse WWW-Authenticate header: accept multiple challenges per header
+ found_challenge = False
+ for mo in AbstractBasicAuthHandler.rx.finditer(header):
+ scheme, quote, realm = mo.groups()
+ if quote not in ['"', "'"]:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 3)
+
+ yield (scheme, realm)
+
+ found_challenge = True
+
+ if not found_challenge:
+ if header:
+ scheme = header.split()[0]
+ else:
+ scheme = ''
+ yield (scheme, None)
+
def http_error_auth_reqed(self, authreq, host, req, headers):
# host may be an authority (without userinfo) or a URL with an
# authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
+ headers = headers.get_all(authreq)
+ if not headers:
+ # no header found
+ return
- if authreq:
- scheme = authreq.split()[0]
- if scheme.lower() != 'basic':
- raise ValueError("AbstractBasicAuthHandler does not"
- " support the following scheme: '%s'" %
- scheme)
- else:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if quote not in ['"',"'"]:
- warnings.warn("Basic Auth Realm was unquoted",
- UserWarning, 2)
- if scheme.lower() == 'basic':
- return self.retry_http_basic_auth(host, req, realm)
+ unsupported = None
+ for header in headers:
+ for scheme, realm in self._parse_realm(header):
+ if scheme.lower() != 'basic':
+ unsupported = scheme
+ continue
+
+ if realm is not None:
+ # Use the first matching Basic challenge.
+ # Ignore following challenges even if they use the Basic
+ # scheme.
+ return self.retry_http_basic_auth(host, req, realm)
+
+ if unsupported is not None:
+ raise ValueError("AbstractBasicAuthHandler does not "
+ "support the following scheme: %r"
+ % (scheme,))
def retry_http_basic_auth(self, host, req, realm):
user, pw = self.passwd.find_user_password(realm, host)