diff options
author | Guido van Rossum <guido@python.org> | 1997-02-01 05:16:08 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-02-01 05:16:08 (GMT) |
commit | 6133ec656e39496a95b92f9677f17bdc8f703dd0 (patch) | |
tree | 94e46e193cb6ca13e335013f05e4fd78e6689dc5 | |
parent | de99d310ccbb1664a3880a140c036fb1f193f1e4 (diff) | |
download | cpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.zip cpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.tar.gz cpython-6133ec656e39496a95b92f9677f17bdc8f703dd0.tar.bz2 |
Process <img> and <frame> tags. Don't bother skipping second href.
-rwxr-xr-x | Tools/webchecker/webchecker.py | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index 9e676ca..69e462a 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -564,11 +564,21 @@ class MyHTMLParser(sgmllib.SGMLParser): sgmllib.SGMLParser.__init__ (self) def start_a(self, attributes): + self.link_attr(attributes, 'href') + + def end_a(self): pass + + def do_img(self, attributes): + self.link_attr(attributes, 'src', 'lowsrc') + + def do_frame(self, attributes): + self.link_attr(attributes, 'src') + + def link_attr(self, attributes, *args): for name, value in attributes: - if name == 'href': + if name in args: if value: value = string.strip(value) if value: self.links[value] = None - return # match only first href def do_base(self, attributes): for name, value in attributes: @@ -578,7 +588,6 @@ class MyHTMLParser(sgmllib.SGMLParser): if verbose > 1: print " Base", value self.base = value - return # match only first href def getlinks(self): return self.links.keys() |