diff options
author | Raymond Hettinger <python@rcn.com> | 2002-05-29 16:18:42 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2002-05-29 16:18:42 (GMT) |
commit | aef22fb9cdf31fb7f0afc28ad049f08a89e23761 (patch) | |
tree | cf1771f344aef5d404a83b7e3b9a5086ac80ca43 /Lib/htmllib.py | |
parent | d68f5171ebb2f3404548c846523e9e43308a4130 (diff) | |
download | cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.zip cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.gz cpython-aef22fb9cdf31fb7f0afc28ad049f08a89e23761.tar.bz2 |
Patch 560023 adding docstrings. 2.2 Candidate (after verifying modules were not updated after 2.2).
Diffstat (limited to 'Lib/htmllib.py')
-rw-r--r-- | Lib/htmllib.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/Lib/htmllib.py b/Lib/htmllib.py index 446192f..6219bf0 100644 --- a/Lib/htmllib.py +++ b/Lib/htmllib.py @@ -11,10 +11,23 @@ from formatter import AS_IS __all__ = ["HTMLParser"] class HTMLParser(SGMLParser): + """This is the basic HTML parser class. + + It supports all entity names required by the HTML 2.0 specification + RFC 1866. It also defines handlers for all HTML 2.0 and many HTML 3.0 + and 3.2 elements. + + """ from htmlentitydefs import entitydefs def __init__(self, formatter, verbose=0): + """Creates an instance of the HTMLParser class. + + The formatter parameter is the formatter instance associated with + the parser. + + """ SGMLParser.__init__(self, verbose) self.formatter = formatter self.savedata = None @@ -43,9 +56,24 @@ class HTMLParser(SGMLParser): # --- Hooks to save data; shouldn't need to be overridden def save_bgn(self): + """Begins saving character data in a buffer instead of sending it + to the formatter object. + + Retrieve the stored data via the save_end() method. Use of the + save_bgn() / save_end() pair may not be nested. + + """ self.savedata = '' def save_end(self): + """Ends buffering character data and returns all data saved since + the preceding call to the save_bgn() method. + + If the nofill flag is false, whitespace is collapsed to single + spaces. A call to this method without a preceding call to the + save_bgn() method will raise a TypeError exception. + + """ data = self.savedata self.savedata = None if not self.nofill: @@ -55,11 +83,26 @@ class HTMLParser(SGMLParser): # --- Hooks for anchors; should probably be overridden def anchor_bgn(self, href, name, type): + """This method is called at the start of an anchor region. + + The arguments correspond to the attributes of the <A> tag with + the same names. The default implementation maintains a list of + hyperlinks (defined by the HREF attribute for <A> tags) within + the document. The list of hyperlinks is available as the data + attribute anchorlist. + + """ self.anchor = href if self.anchor: self.anchorlist.append(href) def anchor_end(self): + """This method is called at the end of an anchor region. + + The default implementation adds a textual footnote marker using an + index into the list of hyperlinks created by the anchor_bgn()method. + + """ if self.anchor: self.handle_data("[%d]" % len(self.anchorlist)) self.anchor = None @@ -67,6 +110,12 @@ class HTMLParser(SGMLParser): # --- Hook for images; should probably be overridden def handle_image(self, src, alt, *args): + """This method is called to handle images. + + The default implementation simply passes the alt value to the + handle_data() method. + + """ self.handle_data(alt) # --------- Top level elememts |