diff options
author | Guido van Rossum <guido@python.org> | 1999-11-17 15:04:26 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1999-11-17 15:04:26 (GMT) |
commit | 64acb5ce93eafcd063d37b5b4e5b8a753465267d (patch) | |
tree | 1e99ca8067dfa5c7a7f77877102c396271ad4307 /Tools | |
parent | a8946406dfd6f4ecaf090325eed64241b36b5d4a (diff) | |
download | cpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.zip cpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.tar.gz cpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.tar.bz2 |
Samuel L. Bayer:
- same trick with "import wcnew; webchecker = wcnew" as above
- updated readhtml() method to handle pair representation; used
new name suppression infrastructure from wcnew.py to suppress
processing name anchors
[And untabified --GvR]
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/webchecker/websucker.py | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/Tools/webchecker/websucker.py b/Tools/webchecker/websucker.py index 67e493d..7453859 100755 --- a/Tools/webchecker/websucker.py +++ b/Tools/webchecker/websucker.py @@ -10,7 +10,9 @@ import string import urllib import getopt -import webchecker +import wcnew + +webchecker = wcnew # Extract real version number if necessary if __version__[0] == '$': @@ -45,14 +47,20 @@ def main(): class Sucker(webchecker.Checker): checkext = 0 + nonames = 1 + + # SAM 11/13/99: in general, URLs are now URL pairs. + # Since we've suppressed name anchor checking, + # we can ignore the second dimension. - def readhtml(self, url): + def readhtml(self, url_pair): + url = url_pair[0] text = None path = self.savefilename(url) try: f = open(path, "rb") except IOError: - f = self.openpage(url) + f = self.openpage(url_pair) if f: info = f.info() nurl = f.geturl() @@ -89,7 +97,7 @@ class Sucker(webchecker.Checker): host, port = urllib.splitnport(host) host = string.lower(host) if not path or path[-1] == "/": - path = path + "index.html" + path = path + "index.html" if os.sep != "/": path = string.join(string.split(path, "/"), os.sep) path = os.path.join(host, path) |