summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker/websucker.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1999-11-17 15:04:26 (GMT)
committerGuido van Rossum <guido@python.org>1999-11-17 15:04:26 (GMT)
commit64acb5ce93eafcd063d37b5b4e5b8a753465267d (patch)
tree1e99ca8067dfa5c7a7f77877102c396271ad4307 /Tools/webchecker/websucker.py
parenta8946406dfd6f4ecaf090325eed64241b36b5d4a (diff)
downloadcpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.zip
cpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.tar.gz
cpython-64acb5ce93eafcd063d37b5b4e5b8a753465267d.tar.bz2
Samuel L. Bayer:
- same trick with "import wcnew; webchecker = wcnew" as above - updated readhtml() method to handle pair representation; used new name suppression infrastructure from wcnew.py to suppress processing name anchors [And untabified --GvR]
Diffstat (limited to 'Tools/webchecker/websucker.py')
-rwxr-xr-xTools/webchecker/websucker.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/Tools/webchecker/websucker.py b/Tools/webchecker/websucker.py
index 67e493d..7453859 100755
--- a/Tools/webchecker/websucker.py
+++ b/Tools/webchecker/websucker.py
@@ -10,7 +10,9 @@ import string
import urllib
import getopt
-import webchecker
+import wcnew
+
+webchecker = wcnew
# Extract real version number if necessary
if __version__[0] == '$':
@@ -45,14 +47,20 @@ def main():
class Sucker(webchecker.Checker):
checkext = 0
+ nonames = 1
+
+ # SAM 11/13/99: in general, URLs are now URL pairs.
+ # Since we've suppressed name anchor checking,
+ # we can ignore the second dimension.
- def readhtml(self, url):
+ def readhtml(self, url_pair):
+ url = url_pair[0]
text = None
path = self.savefilename(url)
try:
f = open(path, "rb")
except IOError:
- f = self.openpage(url)
+ f = self.openpage(url_pair)
if f:
info = f.info()
nurl = f.geturl()
@@ -89,7 +97,7 @@ class Sucker(webchecker.Checker):
host, port = urllib.splitnport(host)
host = string.lower(host)
if not path or path[-1] == "/":
- path = path + "index.html"
+ path = path + "index.html"
if os.sep != "/":
path = string.join(string.split(path, "/"), os.sep)
path = os.path.join(host, path)