diff options
author | Fred Drake <fdrake@acm.org> | 2001-04-04 17:47:25 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2001-04-04 17:47:25 (GMT) |
commit | f3186e82427cc8d7afc2aaaed596a40ca70c19a9 (patch) | |
tree | 65c1414f2b1c68d2d2d70da026cca96a36002648 /Tools/webchecker/webchecker.py | |
parent | 33d2b84b2c420ef6e182aa6c6c91cb7844d9994c (diff) | |
download | cpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.zip cpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.tar.gz cpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.tar.bz2 |
A number of improvements based on a discussion with Chris McCafferty
<christopher.mccafferty@csg.ch>:
Add javascript: and telnet: to the types of URLs we ignore.
Add support for several additional URL-valued attributes on the BODY,
FRAME, IFRAME, LINK, OBJECT, and SCRIPT elements.
Diffstat (limited to 'Tools/webchecker/webchecker.py')
-rwxr-xr-x | Tools/webchecker/webchecker.py | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index e79e7f1..b369ab8 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -481,8 +481,9 @@ class Checker: if self.name_table.has_key(url): return self.name_table[url] - if url[:7] == 'mailto:' or url[:5] == 'news:': - self.note(1, " Not checking mailto/news URL") + scheme = urllib.splittype(url) + if scheme in ('mailto', 'news', 'javascript', 'telnet'): + self.note(1, " Not checking %s URL" % scheme) return None isint = self.inroots(url) @@ -792,10 +793,31 @@ class MyHTMLParser(sgmllib.SGMLParser): def do_area(self, attributes): self.link_attr(attributes, 'href') + def do_body(self, attributes): + self.link_attr(attributes, 'background') + def do_img(self, attributes): self.link_attr(attributes, 'src', 'lowsrc') def do_frame(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + + def do_iframe(self, attributes): + self.link_attr(attributes, 'src', 'longdesc') + + def do_link(self, attributes): + for name, value in attributes: + if name == "rel": + parts = string.split(string.lower(value)) + if ( parts == ["stylesheet"] + or parts == ["alternate", "stylesheet"]): + self.link_attr(attributes, "href") + break + + def do_object(self, attributes): + self.link_attr(attributes, 'data', 'usemap') + + def do_script(self, attributes): self.link_attr(attributes, 'src') def link_attr(self, attributes, *args): |