summaryrefslogtreecommitdiffstats
path: root/Lib/htmllib.py
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2002-05-29 16:18:42 (GMT)
committerRaymond Hettinger <python@rcn.com>2002-05-29 16:18:42 (GMT)
commitaef22fb9cdf31fb7f0afc28ad049f08a89e23761 (patch)
treecf1771f344aef5d404a83b7e3b9a5086ac80ca43 /Lib/htmllib.py
parentd68f5171ebb2f3404548c846523e9e43308a4130 (diff)
downloadcpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.zip
cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.gz
cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.bz2
Patch 560023 adding docstrings. 2.2 Candidate (after verifying modules were not updated after 2.2).
Diffstat (limited to 'Lib/htmllib.py')
-rw-r--r--Lib/htmllib.py49
1 files changed, 49 insertions, 0 deletions
diff --git a/Lib/htmllib.py b/Lib/htmllib.py
index 446192f..6219bf0 100644
--- a/Lib/htmllib.py
+++ b/Lib/htmllib.py
@@ -11,10 +11,23 @@ from formatter import AS_IS
__all__ = ["HTMLParser"]
class HTMLParser(SGMLParser):
+ """This is the basic HTML parser class.
+
+ It supports all entity names required by the HTML 2.0 specification
+ RFC 1866. It also defines handlers for all HTML 2.0 and many HTML 3.0
+ and 3.2 elements.
+
+ """
from htmlentitydefs import entitydefs
def __init__(self, formatter, verbose=0):
+ """Creates an instance of the HTMLParser class.
+
+ The formatter parameter is the formatter instance associated with
+ the parser.
+
+ """
SGMLParser.__init__(self, verbose)
self.formatter = formatter
self.savedata = None
@@ -43,9 +56,24 @@ class HTMLParser(SGMLParser):
# --- Hooks to save data; shouldn't need to be overridden
def save_bgn(self):
+ """Begins saving character data in a buffer instead of sending it
+ to the formatter object.
+
+ Retrieve the stored data via the save_end() method. Use of the
+ save_bgn() / save_end() pair may not be nested.
+
+ """
self.savedata = ''
def save_end(self):
+ """Ends buffering character data and returns all data saved since
+ the preceding call to the save_bgn() method.
+
+ If the nofill flag is false, whitespace is collapsed to single
+ spaces. A call to this method without a preceding call to the
+ save_bgn() method will raise a TypeError exception.
+
+ """
data = self.savedata
self.savedata = None
if not self.nofill:
@@ -55,11 +83,26 @@ class HTMLParser(SGMLParser):
# --- Hooks for anchors; should probably be overridden
def anchor_bgn(self, href, name, type):
+ """This method is called at the start of an anchor region.
+
+ The arguments correspond to the attributes of the <A> tag with
+ the same names. The default implementation maintains a list of
+ hyperlinks (defined by the HREF attribute for <A> tags) within
+ the document. The list of hyperlinks is available as the data
+ attribute anchorlist.
+
+ """
self.anchor = href
if self.anchor:
self.anchorlist.append(href)
def anchor_end(self):
+ """This method is called at the end of an anchor region.
+
+ The default implementation adds a textual footnote marker using an
+ index into the list of hyperlinks created by the anchor_bgn()method.
+
+ """
if self.anchor:
self.handle_data("[%d]" % len(self.anchorlist))
self.anchor = None
@@ -67,6 +110,12 @@ class HTMLParser(SGMLParser):
# --- Hook for images; should probably be overridden
def handle_image(self, src, alt, *args):
+ """This method is called to handle images.
+
+ The default implementation simply passes the alt value to the
+ handle_data() method.
+
+ """
self.handle_data(alt)
# --------- Top level elememts