summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-02-01 05:16:08 (GMT)
committerGuido van Rossum <guido@python.org>1997-02-01 05:16:08 (GMT)
commit6133ec656e39496a95b92f9677f17bdc8f703dd0 (patch)
tree94e46e193cb6ca13e335013f05e4fd78e6689dc5
parentde99d310ccbb1664a3880a140c036fb1f193f1e4 (diff)
downloadcpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.zip
cpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.tar.gz
cpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.tar.bz2
Process <img> and <frame> tags. Don't bother skipping second href.
-rwxr-xr-xTools/webchecker/webchecker.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index 9e676ca..69e462a 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -564,11 +564,21 @@ class MyHTMLParser(sgmllib.SGMLParser):
sgmllib.SGMLParser.__init__ (self)
def start_a(self, attributes):
+ self.link_attr(attributes, 'href')
+
+ def end_a(self): pass
+
+ def do_img(self, attributes):
+ self.link_attr(attributes, 'src', 'lowsrc')
+
+ def do_frame(self, attributes):
+ self.link_attr(attributes, 'src')
+
+ def link_attr(self, attributes, *args):
for name, value in attributes:
- if name == 'href':
+ if name in args:
if value: value = string.strip(value)
if value: self.links[value] = None
- return # match only first href
def do_base(self, attributes):
for name, value in attributes:
@@ -578,7 +588,6 @@ class MyHTMLParser(sgmllib.SGMLParser):
if verbose > 1:
print " Base", value
self.base = value
- return # match only first href
def getlinks(self):
return self.links.keys()