summaryrefslogtreecommitdiffstats
path: root/Tools/webchecker
diff options
context:
space:
mode:
authorAndrew M. Kuchling <amk@amk.ca>2004-03-21 19:07:23 (GMT)
committerAndrew M. Kuchling <amk@amk.ca>2004-03-21 19:07:23 (GMT)
commita982c445430355b1e162a2083d7caa28a67c70ba (patch)
tree652f299e9ceb0b1c61a91c3be64f4a8d1e3beba0 /Tools/webchecker
parent4c4a45de8f992bb0c5cf35910d34ed6c63fa9d14 (diff)
downloadcpython-a982c445430355b1e162a2083d7caa28a67c70ba.zip
cpython-a982c445430355b1e162a2083d7caa28a67c70ba.tar.gz
cpython-a982c445430355b1e162a2083d7caa28a67c70ba.tar.bz2
[Patch #918212] Support XHTML's 'id' attribute, which can be on any element.
Diffstat (limited to 'Tools/webchecker')
-rw-r--r--Tools/webchecker/README3
-rwxr-xr-xTools/webchecker/webchecker.py34
2 files changed, 31 insertions, 6 deletions
diff --git a/Tools/webchecker/README b/Tools/webchecker/README
index 9ef5b47..a51bb3d 100644
--- a/Tools/webchecker/README
+++ b/Tools/webchecker/README
@@ -18,3 +18,6 @@ other options.
- Nov 1999. Sam Bayer contributed patches to reintegrate wcnew.py
into webchecker.py, and corresponding mods to wcgui.py and
websucker.py.
+
+- Mar 2004. Chris Herborth contributed a patch to let webchecker.py
+handle XHTML's 'id' attribute.
diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py
index e89529e..9113c4b 100755
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@@ -784,37 +784,51 @@ class MyHTMLParser(sgmllib.SGMLParser):
self.url = url
sgmllib.SGMLParser.__init__(self)
- def start_a(self, attributes):
- self.link_attr(attributes, 'href')
-
- # We must rescue the NAME
+ def check_name_id( self, attributes ):
+ """ Check the name or id attributes on an element.
+ """
+ # We must rescue the NAME or id (name is deprecated in XHTML)
# attributes from the anchor, in order to
# cache the internal anchors which are made
# available in the page.
for name, value in attributes:
- if name == "name":
+ if name == "name" or name == "id":
if value in self.names:
- self.checker.message("WARNING: duplicate name %s in %s",
+ self.checker.message("WARNING: duplicate ID name %s in %s",
value, self.url)
else: self.names.append(value)
break
+ def unknown_starttag( self, tag, attributes ):
+ """ In XHTML, you can have id attributes on any element.
+ """
+ self.check_name_id(attributes)
+
+ def start_a(self, attributes):
+ self.link_attr(attributes, 'href')
+ self.check_name_id(attributes)
+
def end_a(self): pass
def do_area(self, attributes):
self.link_attr(attributes, 'href')
+ self.check_name_id(attributes)
def do_body(self, attributes):
self.link_attr(attributes, 'background', 'bgsound')
+ self.check_name_id(attributes)
def do_img(self, attributes):
self.link_attr(attributes, 'src', 'lowsrc')
+ self.check_name_id(attributes)
def do_frame(self, attributes):
self.link_attr(attributes, 'src', 'longdesc')
+ self.check_name_id(attributes)
def do_iframe(self, attributes):
self.link_attr(attributes, 'src', 'longdesc')
+ self.check_name_id(attributes)
def do_link(self, attributes):
for name, value in attributes:
@@ -824,24 +838,31 @@ class MyHTMLParser(sgmllib.SGMLParser):
or parts == ["alternate", "stylesheet"]):
self.link_attr(attributes, "href")
break
+ self.check_name_id(attributes)
def do_object(self, attributes):
self.link_attr(attributes, 'data', 'usemap')
+ self.check_name_id(attributes)
def do_script(self, attributes):
self.link_attr(attributes, 'src')
+ self.check_name_id(attributes)
def do_table(self, attributes):
self.link_attr(attributes, 'background')
+ self.check_name_id(attributes)
def do_td(self, attributes):
self.link_attr(attributes, 'background')
+ self.check_name_id(attributes)
def do_th(self, attributes):
self.link_attr(attributes, 'background')
+ self.check_name_id(attributes)
def do_tr(self, attributes):
self.link_attr(attributes, 'background')
+ self.check_name_id(attributes)
def link_attr(self, attributes, *args):
for name, value in attributes:
@@ -857,6 +878,7 @@ class MyHTMLParser(sgmllib.SGMLParser):
if self.checker:
self.checker.note(1, " Base %s", value)
self.base = value
+ self.check_name_id(attributes)
def getlinks(self):
return self.links.keys()