From 25211f5724087516f10ddb8a232d63e09a9b9bec Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Thu, 5 Jul 2001 16:34:36 +0000 Subject: Added more information on the differences between the htmllib and HTMLParser modules. --- Doc/lib/libhtmllib.tex | 6 ++++++ Doc/lib/libhtmlparser.tex | 8 +++++++- Doc/lib/libsgmllib.tex | 5 +++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Doc/lib/libhtmllib.tex b/Doc/lib/libhtmllib.tex index 508d004..666affb 100644 --- a/Doc/lib/libhtmllib.tex +++ b/Doc/lib/libhtmllib.tex @@ -70,6 +70,12 @@ handlers for all HTML 2.0 and many HTML 3.0 and 3.2 elements. \begin{seealso} + \seemodule{HTMLParser}{Alternate HTML parser that offers a slightly + lower-level view of the input, but is + designed to work with XHTML, and does not + implement some of the SGML syntax not used in + ``HTML as deployed'' and which isn't legal + for XHTML.} \seemodule{htmlentitydefs}{Definition of replacement text for HTML 2.0 entities.} \seemodule{sgmllib}{Base class for \class{HTMLParser}.} diff --git a/Doc/lib/libhtmlparser.tex b/Doc/lib/libhtmlparser.tex index e8b4dd9..68c93e0 100644 --- a/Doc/lib/libhtmlparser.tex +++ b/Doc/lib/libhtmlparser.tex @@ -6,7 +6,9 @@ This module defines a class \class{HTMLParser} which serves as the basis for parsing text files formatted in HTML\index{HTML} (HyperText -Mark-up Language) and XHTML.\index{XHTML} +Mark-up Language) and XHTML.\index{XHTML} Unlike the parser in +\refmodule{htmllib}, this parser is not based on the SGML parser in +\refmodule{sgmllib}. \begin{classdesc}{HTMLParser}{} @@ -15,6 +17,10 @@ The \class{HTMLParser} class is instantiated without arguments. An HTMLParser instance is fed HTML data and calls handler functions when tags begin and end. The \class{HTMLParser} class is meant to be overridden by the user to provide a desired behavior. + +Unlike the parser in \refmodule{htmllib}, this parser does not check +that end tags match start tags or call the end-tag handler for +elements which are closed implicitly by closing an outer element. \end{classdesc} diff --git a/Doc/lib/libsgmllib.tex b/Doc/lib/libsgmllib.tex index 5fe0c8d..3699d24 100644 --- a/Doc/lib/libsgmllib.tex +++ b/Doc/lib/libsgmllib.tex @@ -10,8 +10,9 @@ This module defines a class \class{SGMLParser} which serves as the basis for parsing text files formatted in SGML (Standard Generalized Mark-up Language). In fact, it does not provide a full SGML parser --- it only parses SGML insofar as it is used by HTML, and the module -only exists as a base for the \refmodule{htmllib}\refstmodindex{htmllib} -module. +only exists as a base for the \refmodule{htmllib} module. Another +HTML parser which supports XHTML and offers a somewhat different +interface is available in the \refmodule{HTMLParser} module. \begin{classdesc}{SGMLParser}{} -- cgit v0.12