diff options
author | Fred Drake <fdrake@acm.org> | 2006-04-01 22:14:43 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2006-04-01 22:14:43 (GMT) |
commit | ad5177cf8da387c203d500d0a65995f9641373ba (patch) | |
tree | 838febacb85a56c962e29fb48796689b7ab7404f /Lib/urlparse.py | |
parent | f878b8120c89d53722ba77c5f29c680695fd4257 (diff) | |
download | cpython-ad5177cf8da387c203d500d0a65995f9641373ba.zip cpython-ad5177cf8da387c203d500d0a65995f9641373ba.tar.gz cpython-ad5177cf8da387c203d500d0a65995f9641373ba.tar.bz2 |
Patch #624325: urlparse.urlparse() and urlparse.urlsplit() results
now sport attributes that provide access to the parts of the result.
Diffstat (limited to 'Lib/urlparse.py')
-rw-r--r-- | Lib/urlparse.py | 125 |
1 files changed, 115 insertions, 10 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 8d44853..eade040 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -41,7 +41,111 @@ def clear_cache(): _parse_cache = {} -def urlparse(url, scheme='', allow_fragments=1): +class BaseResult(tuple): + """Base class for the parsed result objects. + + This provides the attributes shared by the two derived result + objects as read-only properties. The derived classes are + responsible for checking the right number of arguments were + supplied to the constructor. + + """ + + __slots__ = () + + # Attributes that access the basic components of the URL: + + @property + def scheme(self): + return self[0] + + @property + def netloc(self): + return self[1] + + @property + def path(self): + return self[2] + + @property + def query(self): + return self[-2] + + @property + def fragment(self): + return self[-1] + + # Additional attributes that provide access to parsed-out portions + # of the netloc: + + @property + def username(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + userinfo = userinfo.split(":", 1)[0] + return userinfo + return None + + @property + def password(self): + netloc = self.netloc + if "@" in netloc: + userinfo = netloc.split("@", 1)[0] + if ":" in userinfo: + return userinfo.split(":", 1)[1] + return None + + @property + def hostname(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + netloc = netloc.split(":", 1)[0] + return netloc.lower() or None + + @property + def port(self): + netloc = self.netloc + if "@" in netloc: + netloc = netloc.split("@", 1)[1] + if ":" in netloc: + port = netloc.split(":", 1)[1] + return int(port, 10) + return None + + +class SplitResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, query, fragment)) + + def geturl(self): + return urlunsplit(self) + + +class ParseResult(BaseResult): + + __slots__ = () + + def __new__(cls, scheme, netloc, path, params, query, fragment): + return BaseResult.__new__( + cls, (scheme, netloc, path, params, query, fragment)) + + @property + def params(self): + return self[3] + + def geturl(self): + return urlunparse(self) + + +def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). @@ -53,7 +157,7 @@ def urlparse(url, scheme='', allow_fragments=1): url, params = _splitparams(url) else: params = '' - return scheme, netloc, url, params, query, fragment + return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: @@ -73,12 +177,13 @@ def _splitnetloc(url, start=0): delim = len(url) return url[start:delim], url[delim:] -def urlsplit(url, scheme='', allow_fragments=1): +def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: @@ -97,9 +202,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v for c in url[:i]: if c not in scheme_chars: break @@ -111,9 +216,9 @@ def urlsplit(url, scheme='', allow_fragments=1): url, fragment = url.split('#', 1) if scheme in uses_query and '?' in url: url, query = url.split('?', 1) - tuple = scheme, netloc, url, query, fragment - _parse_cache[key] = tuple - return tuple + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return v def urlunparse((scheme, netloc, url, params, query, fragment)): """Put a parsed URL back together again. This may result in a @@ -136,7 +241,7 @@ def urlunsplit((scheme, netloc, url, query, fragment)): url = url + '#' + fragment return url -def urljoin(base, url, allow_fragments = 1): +def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: |