summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2001-04-04 17:47:25 (GMT)
committerFred Drake <fdrake@acm.org>2001-04-04 17:47:25 (GMT)
commitf3186e82427cc8d7afc2aaaed596a40ca70c19a9 (patch)
tree65c1414f2b1c68d2d2d70da026cca96a36002648 /Tools/webchecker
parent33d2b84b2c420ef6e182aa6c6c91cb7844d9994c (diff)
downloadcpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.zip
cpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.tar.gz
cpython-f3186e82427cc8d7afc2aaaed596a40ca70c19a9.tar.bz2
A number of improvements based on a discussion with Chris McCafferty
<christopher.mccafferty@csg.ch>: Add javascript: and telnet: to the types of URLs we ignore. Add support for several additional URL-valued attributes on the BODY, FRAME, IFRAME, LINK, OBJECT, and SCRIPT elements.
Diffstat (limited to 'Tools/webchecker')
-rwxr-xr-xTools/webchecker/webchecker.py26
1 files changed, 24 insertions, 2 deletions
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index e79e7f1..b369ab8 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -481,8 +481,9 @@ class Checker:
if self.name_table.has_key(url):
return self.name_table[url]
- if url[:7] == 'mailto:' or url[:5] == 'news:':
- self.note(1, " Not checking mailto/news URL")
+ scheme = urllib.splittype(url)
+ if scheme in ('mailto', 'news', 'javascript', 'telnet'):
+ self.note(1, " Not checking %s URL" % scheme)
return None
isint = self.inroots(url)
@@ -792,10 +793,31 @@ class MyHTMLParser(sgmllib.SGMLParser):
def do_area(self, attributes):
self.link_attr(attributes, 'href')
+ def do_body(self, attributes):
+ self.link_attr(attributes, 'background')
+
def do_img(self, attributes):
self.link_attr(attributes, 'src', 'lowsrc')
def do_frame(self, attributes):
+ self.link_attr(attributes, 'src', 'longdesc')
+
+ def do_iframe(self, attributes):
+ self.link_attr(attributes, 'src', 'longdesc')
+
+ def do_link(self, attributes):
+ for name, value in attributes:
+ if name == "rel":
+ parts = string.split(string.lower(value))
+ if ( parts == ["stylesheet"]
+ or parts == ["alternate", "stylesheet"]):
+ self.link_attr(attributes, "href")
+ break
+
+ def do_object(self, attributes):
+ self.link_attr(attributes, 'data', 'usemap')
+
+ def do_script(self, attributes):
self.link_attr(attributes, 'src')
def link_attr(self, attributes, *args):