summaryrefslogtreecommitdiffstats
path: root/Doc/libsgmllib.tex
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/libsgmllib.tex')
-rw-r--r--Doc/libsgmllib.tex75
1 files changed, 39 insertions, 36 deletions
diff --git a/Doc/libsgmllib.tex b/Doc/libsgmllib.tex
index fd7eeaa..78060ec 100644
--- a/Doc/libsgmllib.tex
+++ b/Doc/libsgmllib.tex
@@ -3,18 +3,20 @@
\stmodindex{sgmllib}
\index{SGML}
-This module defines a class \code{SGMLParser} which serves as the
+This module defines a class \class{SGMLParser} which serves as the
basis for parsing text files formatted in SGML (Standard Generalized
Mark-up Language). In fact, it does not provide a full SGML parser
--- it only parses SGML insofar as it is used by HTML, and the module
-only exists as a base for the \code{htmllib} module.
-\refstmodindex{htmllib}
+only exists as a base for the \module{htmllib}\refstmodindex{htmllib}
+module.
-In particular, the parser is hardcoded to recognize the following
+
+\begin{classdesc}{SGMLParser}{}
+The \class{SGMLParser} class is instantiated without arguments.
+The parser is hardcoded to recognize the following
constructs:
\begin{itemize}
-
\item
Opening and closing tags of the form
\samp{<\var{tag} \var{attr}="\var{value}" ...>} and
@@ -32,9 +34,9 @@ spaces, tabs, and newlines are allowed between the trailing
\samp{>} and the immediately preceeding \samp{--}.
\end{itemize}
+\end{classdesc}
-The \code{SGMLParser} class must be instantiated without arguments.
-It has the following interface methods:
+\class{SGMLParser} instances have the following interface methods:
\setindexsubitem{(SGMLParser method)}
@@ -56,42 +58,41 @@ Enter literal mode (CDATA mode).
\begin{funcdesc}{feed}{data}
Feed some text to the parser. It is processed insofar as it consists
of complete elements; incomplete data is buffered until more data is
-fed or \code{close()} is called.
+fed or \method{close()} is called.
\end{funcdesc}
\begin{funcdesc}{close}{}
Force processing of all buffered data as if it were followed by an
end-of-file mark. This method may be redefined by a derived class to
define additional processing at the end of the input, but the
-redefined version should always call \code{SGMLParser.close()}.
+redefined version should always call \method{close()}.
\end{funcdesc}
-\begin{funcdesc}{handle_starttag}{tag\, method\, attributes}
+\begin{funcdesc}{handle_starttag}{tag, method, attributes}
This method is called to handle start tags for which either a
\code{start_\var{tag}()} or \code{do_\var{tag}()} method has been
-defined. The \code{tag} argument is the name of the tag converted to
-lower case, and the \code{method} argument is the bound method which
+defined. The \var{tag} argument is the name of the tag converted to
+lower case, and the \var{method} argument is the bound method which
should be used to support semantic interpretation of the start tag.
-The \var{attributes} argument is a list of (\var{name}, \var{value})
+The \var{attributes} argument is a list of \code{(\var{name}, \var{value})}
pairs containing the attributes found inside the tag's \code{<>}
brackets. The \var{name} has been translated to lower case and double
quotes and backslashes in the \var{value} have been interpreted. For
instance, for the tag \code{<A HREF="http://www.cwi.nl/">}, this
-method would be called as \code{unknown_starttag('a', [('href',
+method would be called as \samp{unknown_starttag('a', [('href',
'http://www.cwi.nl/')])}. The base implementation simply calls
-\code{method} with \code{attributes} as the only argument.
+\var{method} with \var{attributes} as the only argument.
\end{funcdesc}
-\begin{funcdesc}{handle_endtag}{tag\, method}
-
+\begin{funcdesc}{handle_endtag}{tag, method}
This method is called to handle endtags for which an
-\code{end_\var{tag}()} method has been defined. The \code{tag}
+\code{end_\var{tag}()} method has been defined. The \var{tag}
argument is the name of the tag converted to lower case, and the
-\code{method} argument is the bound method which should be used to
+\var{method} argument is the bound method which should be used to
support semantic interpretation of the end tag. If no
-\code{end_\var{tag}()} method is defined for the closing element, this
-handler is not called. The base implementation simply calls
-\code{method}.
+\code{end_\var{tag}()} method is defined for the closing element,
+this handler is not called. The base implementation simply calls
+\var{method}.
\end{funcdesc}
\begin{funcdesc}{handle_data}{data}
@@ -105,7 +106,7 @@ This method is called to process a character reference of the form
\samp{\&\#\var{ref};}. In the base implementation, \var{ref} must
be a decimal number in the
range 0-255. It translates the character to \ASCII{} and calls the
-method \code{handle_data()} with the character as argument. If
+method \method{handle_data()} with the character as argument. If
\var{ref} is invalid or out of range, the method
\code{unknown_charref(\var{ref})} is called to handle the error. A
subclass must override this method to provide support for named
@@ -113,21 +114,21 @@ character entities.
\end{funcdesc}
\begin{funcdesc}{handle_entityref}{ref}
-This method is called to process a general entity reference of the form
-\samp{\&\var{ref};} where \var{ref} is an general entity
+This method is called to process a general entity reference of the
+form \samp{\&\var{ref};} where \var{ref} is an general entity
reference. It looks for \var{ref} in the instance (or class)
-variable \code{entitydefs} which should be a mapping from entity names
-to corresponding translations.
-If a translation is found, it calls the method \code{handle_data()}
+variable \member{entitydefs} which should be a mapping from entity
+names to corresponding translations.
+If a translation is found, it calls the method \method{handle_data()}
with the translation; otherwise, it calls the method
-\code{unknown_entityref(\var{ref})}. The default \code{entitydefs}
+\code{unknown_entityref(\var{ref})}. The default \member{entitydefs}
defines translations for \code{\&amp;}, \code{\&apos}, \code{\&gt;},
\code{\&lt;}, and \code{\&quot;}.
\end{funcdesc}
\begin{funcdesc}{handle_comment}{comment}
This method is called when a comment is encountered. The
-\code{comment} argument is a string containing the text between the
+\var{comment} argument is a string containing the text between the
\samp{<!--} and \samp{-->} delimiters, but not the delimiters
themselves. For example, the comment \samp{<!--text-->} will
cause this method to be called with the argument \code{'text'}. The
@@ -153,8 +154,9 @@ does nothing.
\begin{funcdesc}{unknown_charref}{ref}
This method is called to process unresolvable numeric character
-references. It is intended to be overridden by a derived class; the
-base class implementation does nothing.
+references. Refer to \method{handle_charref()} to determine what is
+handled by default. It is intended to be overridden by a derived
+class; the base class implementation does nothing.
\end{funcdesc}
\begin{funcdesc}{unknown_entityref}{ref}
@@ -171,14 +173,15 @@ case:
\begin{funcdescni}{start_\var{tag}}{attributes}
This method is called to process an opening tag \var{tag}. It has
-preference over \code{do_\var{tag}()}. The \var{attributes} argument
-has the same meaning as described for \code{handle_starttag()} above.
+preference over \code{do_\var{tag}()}. The \var{attributes}
+argument has the same meaning as described for
+\method{handle_starttag()} above.
\end{funcdescni}
\begin{funcdescni}{do_\var{tag}}{attributes}
This method is called to process an opening tag \var{tag} that does
not come with a matching closing tag. The \var{attributes} argument
-has the same meaning as described for \code{handle_starttag()} above.
+has the same meaning as described for \method{handle_starttag()} above.
\end{funcdescni}
\begin{funcdescni}{end_\var{tag}}{}
@@ -189,7 +192,7 @@ Note that the parser maintains a stack of open elements for which no
end tag has been found yet. Only tags processed by
\code{start_\var{tag}()} are pushed on this stack. Definition of an
\code{end_\var{tag}()} method is optional for these tags. For tags
-processed by \code{do_\var{tag}()} or by \code{unknown_tag()}, no
+processed by \code{do_\var{tag}()} or by \method{unknown_tag()}, no
\code{end_\var{tag}()} method must be defined; if defined, it will not
be used. If both \code{start_\var{tag}()} and \code{do_\var{tag}()}
methods exist for a tag, the \code{start_\var{tag}()} method takes