diff options
Diffstat (limited to 'Lib/urlparse.py')
-rw-r--r-- | Lib/urlparse.py | 131 |
1 files changed, 118 insertions, 13 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 8b75051..eade040 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -16,12 +16,12 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp'] non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip'] + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', ''] + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', 'file', 'prospero', ''] @@ -41,7 +41,111 @@ def clear_cache(): _parse_cache = {} -def urlparse(url, scheme='', allow_fragments=1): +class BaseResult(tuple): + """Base class for the parsed result objects. + + This provides the attributes shared by the two derived result + objects as read-only properties. The derived classes are + responsible for checking the right number of arguments were + supplied to the constructor. + + """ + + __slots__ = () + + # Attributes that access the basic components of the URL: + + @property + def scheme(self): + return self[0] + + @property + def netloc(self): + return self[1] + + @property + def path(self): + return self[2] + + @property + def query(self): + return self[-2] + + @property + def fragment(self): + return self[-1] + + # Additional attributes that provide access to parsed-out portions + # of the netloc: + + @property + def username(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + userinfo = userinfo.split(":", 1)[0] + return userinfo + return None + + @property + def password(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + return userinfo.split(":", 1)[1] + return None + + @property + def hostname(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + netloc = netloc.split(":", 1)[0] + return netloc.lower() or None + + @property + def port(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + port = netloc.split(":", 1)[1] + return int(port, 10) + return None + + +class SplitResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, query, fragment)) + + def geturl(self): + return urlunsplit(self) + + +class ParseResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, params, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, params, query, fragment)) + + @property + def params(self): + return self[3] + + def geturl(self): + return urlunparse(self) + + +def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). @@ -53,7 +157,7 @@ def urlparse(url, scheme='', allow_fragments=1): url, params = _splitparams(url) else: params = '' - return scheme, netloc, url, params, query, fragment + return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: @@ -73,12 +177,13 @@ def _splitnetloc(url, start=0): delim = len(url) return url[start:delim], url[delim:] -def urlsplit(url, scheme='', allow_fragments=1): +def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: @@ -97,9 +202,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v for c in url[:i]: if c not in scheme_chars: break @@ -111,9 +216,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if scheme in uses_query and '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v def urlunparse((scheme, netloc, url, params, query, fragment)): """Put a parsed URL back together again. This may result in a @@ -136,7 +241,7 @@ def urlunsplit((scheme, netloc, url, query, fragment)): url = url + '#' + fragment return url -def urljoin(base, url, allow_fragments = 1): +def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: |