From c76ffca2befde942bfa98e3972d5cead572c771a Mon Sep 17 00:00:00 2001
From: Georg Brandl <georg@python.org>
Date: Sat, 17 May 2008 21:54:03 +0000
Subject: Rename html.parser file, and split html.entities from htmllib to ease
 removal of the latter in Py3k.

---
 Doc/library/html.entities.rst |  42 ++++++++++
 Doc/library/html.parser.rst   | 190 +++++++++++++++++++++++++++++++++++++++++
 Doc/library/htmllib.rst       |  45 ----------
 Doc/library/htmlparser.rst    | 191 ------------------------------------------
 Doc/library/markup.rst        |   3 +-
 5 files changed, 234 insertions(+), 237 deletions(-)
 create mode 100644 Doc/library/html.entities.rst
 create mode 100644 Doc/library/html.parser.rst
 delete mode 100644 Doc/library/htmlparser.rst

diff --git a/Doc/library/html.entities.rst b/Doc/library/html.entities.rst
new file mode 100644
index 0000000..601e181
--- /dev/null
+++ b/Doc/library/html.entities.rst
@@ -0,0 +1,42 @@
+:mod:`html.entities` --- Definitions of HTML general entities
+=============================================================
+
+.. module:: htmlentitydefs
+   :synopsis: Old name for the html.entities module.
+
+.. module:: html.entities
+   :synopsis: Definitions of HTML general entities.
+.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
+
+.. note::
+   The :mod:`htmlentitydefs` module has been renamed to :mod:`html.entities` in
+   Python 3.0.  It is importable under both names in Python 2.6 and the rest of
+   the 2.x series.
+
+
+This module defines three dictionaries, ``name2codepoint``, ``codepoint2name``,
+and ``entitydefs``. ``entitydefs`` is used by the :mod:`htmllib` module to
+provide the :attr:`entitydefs` member of the :class:`HTMLParser` class.  The
+definition provided here contains all the entities defined by XHTML 1.0  that
+can be handled using simple textual substitution in the Latin-1 character set
+(ISO-8859-1).
+
+
+.. data:: entitydefs
+
+   A dictionary mapping XHTML 1.0 entity definitions to their replacement text in
+   ISO Latin-1.
+
+
+.. data:: name2codepoint
+
+   A dictionary that maps HTML entity names to the Unicode codepoints.
+
+   .. versionadded:: 2.3
+
+
+.. data:: codepoint2name
+
+   A dictionary that maps Unicode codepoints to HTML entity names.
+
+   .. versionadded:: 2.3
diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst
new file mode 100644
index 0000000..5fa5a70
--- /dev/null
+++ b/Doc/library/html.parser.rst
@@ -0,0 +1,190 @@
+:mod:`html.parser` --- Simple HTML and XHTML parser
+===================================================
+
+.. module:: HTMLParser
+   :synopsis: Old name for the html.parser module.
+
+.. module:: html.parser
+   :synopsis: A simple parser that can handle HTML and XHTML.
+
+.. note::
+   The :mod:`HTMLParser` module has been renamed to :mod:`html.parser` in Python
+   3.0.  It is importable under both names in Python 2.6 and the rest of the 2.x
+   series.
+
+
+.. versionadded:: 2.2
+
+.. index::
+   single: HTML
+   single: XHTML
+
+This module defines a class :class:`HTMLParser` which serves as the basis for
+parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
+Unlike the parser in :mod:`htmllib`, this parser is not based on the SGML parser
+in :mod:`sgmllib`.
+
+
+.. class:: HTMLParser()
+
+   The :class:`HTMLParser` class is instantiated without arguments.
+
+   An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
+   begin and end.  The :class:`HTMLParser` class is meant to be overridden by the
+   user to provide a desired behavior.
+
+   Unlike the parser in :mod:`htmllib`, this parser does not check that end tags
+   match start tags or call the end-tag handler for elements which are closed
+   implicitly by closing an outer element.
+
+An exception is defined as well:
+
+
+.. exception:: HTMLParseError
+
+   Exception raised by the :class:`HTMLParser` class when it encounters an error
+   while parsing.  This exception provides three attributes: :attr:`msg` is a brief
+   message explaining the error, :attr:`lineno` is the number of the line on which
+   the broken construct was detected, and :attr:`offset` is the number of
+   characters into the line at which the construct starts.
+
+:class:`HTMLParser` instances have the following methods:
+
+
+.. method:: HTMLParser.reset()
+
+   Reset the instance.  Loses all unprocessed data.  This is called implicitly at
+   instantiation time.
+
+
+.. method:: HTMLParser.feed(data)
+
+   Feed some text to the parser.  It is processed insofar as it consists of
+   complete elements; incomplete data is buffered until more data is fed or
+   :meth:`close` is called.
+
+
+.. method:: HTMLParser.close()
+
+   Force processing of all buffered data as if it were followed by an end-of-file
+   mark.  This method may be redefined by a derived class to define additional
+   processing at the end of the input, but the redefined version should always call
+   the :class:`HTMLParser` base class method :meth:`close`.
+
+
+.. method:: HTMLParser.getpos()
+
+   Return current line number and offset.
+
+
+.. method:: HTMLParser.get_starttag_text()
+
+   Return the text of the most recently opened start tag.  This should not normally
+   be needed for structured processing, but may be useful in dealing with HTML "as
+   deployed" or for re-generating input with minimal changes (whitespace between
+   attributes can be preserved, etc.).
+
+
+.. method:: HTMLParser.handle_starttag(tag, attrs)
+
+   This method is called to handle the start of a tag.  It is intended to be
+   overridden by a derived class; the base class implementation does nothing.
+
+   The *tag* argument is the name of the tag converted to lower case. The *attrs*
+   argument is a list of ``(name, value)`` pairs containing the attributes found
+   inside the tag's ``<>`` brackets.  The *name* will be translated to lower case,
+   and quotes in the *value* have been removed, and character and entity references
+   have been replaced.  For instance, for the tag ``<A
+   HREF="http://www.cwi.nl/">``, this method would be called as
+   ``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
+
+   .. versionchanged:: 2.6
+      All entity references from :mod:`html.entities` are now replaced in the
+      attribute values.
+
+
+.. method:: HTMLParser.handle_startendtag(tag, attrs)
+
+   Similar to :meth:`handle_starttag`, but called when the parser encounters an
+   XHTML-style empty tag (``<a .../>``).  This method may be overridden by
+   subclasses which require this particular lexical information; the default
+   implementation simple calls :meth:`handle_starttag` and :meth:`handle_endtag`.
+
+
+.. method:: HTMLParser.handle_endtag(tag)
+
+   This method is called to handle the end tag of an element.  It is intended to be
+   overridden by a derived class; the base class implementation does nothing.  The
+   *tag* argument is the name of the tag converted to lower case.
+
+
+.. method:: HTMLParser.handle_data(data)
+
+   This method is called to process arbitrary data.  It is intended to be
+   overridden by a derived class; the base class implementation does nothing.
+
+
+.. method:: HTMLParser.handle_charref(name)
+
+   This method is called to process a character reference of the form ``&#ref;``.
+   It is intended to be overridden by a derived class; the base class
+   implementation does nothing.
+
+
+.. method:: HTMLParser.handle_entityref(name)
+
+   This method is called to process a general entity reference of the form
+   ``&name;`` where *name* is an general entity reference.  It is intended to be
+   overridden by a derived class; the base class implementation does nothing.
+
+
+.. method:: HTMLParser.handle_comment(data)
+
+   This method is called when a comment is encountered.  The *comment* argument is
+   a string containing the text between the ``--`` and ``--`` delimiters, but not
+   the delimiters themselves.  For example, the comment ``<!--text-->`` will cause
+   this method to be called with the argument ``'text'``.  It is intended to be
+   overridden by a derived class; the base class implementation does nothing.
+
+
+.. method:: HTMLParser.handle_decl(decl)
+
+   Method called when an SGML declaration is read by the parser.  The *decl*
+   parameter will be the entire contents of the declaration inside the ``<!``...\
+   ``>`` markup.  It is intended to be overridden by a derived class; the base
+   class implementation does nothing.
+
+
+.. method:: HTMLParser.handle_pi(data)
+
+   Method called when a processing instruction is encountered.  The *data*
+   parameter will contain the entire processing instruction. For example, for the
+   processing instruction ``<?proc color='red'>``, this method would be called as
+   ``handle_pi("proc color='red'")``.  It is intended to be overridden by a derived
+   class; the base class implementation does nothing.
+
+   .. note::
+
+      The :class:`HTMLParser` class uses the SGML syntactic rules for processing
+      instructions.  An XHTML processing instruction using the trailing ``'?'`` will
+      cause the ``'?'`` to be included in *data*.
+
+
+.. _htmlparser-example:
+
+Example HTML Parser Application
+-------------------------------
+
+As a basic example, below is a very basic HTML parser that uses the
+:class:`HTMLParser` class to print out tags as they are encountered::
+
+   from html.parser import HTMLParser
+
+   class MyHTMLParser(HTMLParser):
+
+       def handle_starttag(self, tag, attrs):
+           print "Encountered the beginning of a %s tag" % tag
+
+       def handle_endtag(self, tag):
+           print "Encountered the end of a %s tag" % tag
+
diff --git a/Doc/library/htmllib.rst b/Doc/library/htmllib.rst
index 8241c14..953a4e4 100644
--- a/Doc/library/htmllib.rst
+++ b/Doc/library/htmllib.rst
@@ -147,48 +147,3 @@ additional methods and instance variables for use within tag methods.
    call to :meth:`save_bgn`.  If the :attr:`nofill` flag is false, whitespace is
    collapsed to single spaces.  A call to this method without a preceding call to
    :meth:`save_bgn` will raise a :exc:`TypeError` exception.
-
-
-:mod:`html.entities` --- Definitions of HTML general entities
-=============================================================
-
-.. module:: htmlentitydefs
-   :synopsis: Old name for the :mod:`html.entities` module.
-
-.. module:: html.entities
-   :synopsis: Definitions of HTML general entities.
-.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org>
-
-.. note::
-   The :mod:`htmlentitydefs` module has been renamed to
-   :mod:`html.entities` in Python 3.0.  It is importable under both names
-   in Python 2.6 and the rest of the 2.x series.
-
-
-This module defines three dictionaries, ``name2codepoint``, ``codepoint2name``,
-and ``entitydefs``. ``entitydefs`` is used by the :mod:`htmllib` module to
-provide the :attr:`entitydefs` member of the :class:`HTMLParser` class.  The
-definition provided here contains all the entities defined by XHTML 1.0  that
-can be handled using simple textual substitution in the Latin-1 character set
-(ISO-8859-1).
-
-
-.. data:: entitydefs
-
-   A dictionary mapping XHTML 1.0 entity definitions to their replacement text in
-   ISO Latin-1.
-
-
-.. data:: name2codepoint
-
-   A dictionary that maps HTML entity names to the Unicode codepoints.
-
-   .. versionadded:: 2.3
-
-
-.. data:: codepoint2name
-
-   A dictionary that maps Unicode codepoints to HTML entity names.
-
-   .. versionadded:: 2.3
-
diff --git a/Doc/library/htmlparser.rst b/Doc/library/htmlparser.rst
deleted file mode 100644
index a58769a..0000000
--- a/Doc/library/htmlparser.rst
+++ /dev/null
@@ -1,191 +0,0 @@
-
-:mod:`html.parser` --- Simple HTML and XHTML parser
-===================================================
-
-.. module:: HTMLParser
-   :synopsis: Old name for the :mod:`html.parser` module.
-
-.. module:: html.parser
-   :synopsis: A simple parser that can handle HTML and XHTML.
-
-.. note::
-   The :mod:`HTMLParser` module has been renamed to
-   :mod:`html.parser` in Python 3.0.  It is importable under both names
-   in Python 2.6 and the rest of the 2.x series.
-
-
-.. versionadded:: 2.2
-
-.. index::
-   single: HTML
-   single: XHTML
-
-This module defines a class :class:`HTMLParser` which serves as the basis for
-parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
-Unlike the parser in :mod:`htmllib`, this parser is not based on the SGML parser
-in :mod:`sgmllib`.
-
-
-.. class:: HTMLParser()
-
-   The :class:`HTMLParser` class is instantiated without arguments.
-
-   An :class:`HTMLParser` instance is fed HTML data and calls handler functions when tags
-   begin and end.  The :class:`HTMLParser` class is meant to be overridden by the
-   user to provide a desired behavior.
-
-   Unlike the parser in :mod:`htmllib`, this parser does not check that end tags
-   match start tags or call the end-tag handler for elements which are closed
-   implicitly by closing an outer element.
-
-An exception is defined as well:
-
-
-.. exception:: HTMLParseError
-
-   Exception raised by the :class:`HTMLParser` class when it encounters an error
-   while parsing.  This exception provides three attributes: :attr:`msg` is a brief
-   message explaining the error, :attr:`lineno` is the number of the line on which
-   the broken construct was detected, and :attr:`offset` is the number of
-   characters into the line at which the construct starts.
-
-:class:`HTMLParser` instances have the following methods:
-
-
-.. method:: HTMLParser.reset()
-
-   Reset the instance.  Loses all unprocessed data.  This is called implicitly at
-   instantiation time.
-
-
-.. method:: HTMLParser.feed(data)
-
-   Feed some text to the parser.  It is processed insofar as it consists of
-   complete elements; incomplete data is buffered until more data is fed or
-   :meth:`close` is called.
-
-
-.. method:: HTMLParser.close()
-
-   Force processing of all buffered data as if it were followed by an end-of-file
-   mark.  This method may be redefined by a derived class to define additional
-   processing at the end of the input, but the redefined version should always call
-   the :class:`HTMLParser` base class method :meth:`close`.
-
-
-.. method:: HTMLParser.getpos()
-
-   Return current line number and offset.
-
-
-.. method:: HTMLParser.get_starttag_text()
-
-   Return the text of the most recently opened start tag.  This should not normally
-   be needed for structured processing, but may be useful in dealing with HTML "as
-   deployed" or for re-generating input with minimal changes (whitespace between
-   attributes can be preserved, etc.).
-
-
-.. method:: HTMLParser.handle_starttag(tag, attrs)
-
-   This method is called to handle the start of a tag.  It is intended to be
-   overridden by a derived class; the base class implementation does nothing.
-
-   The *tag* argument is the name of the tag converted to lower case. The *attrs*
-   argument is a list of ``(name, value)`` pairs containing the attributes found
-   inside the tag's ``<>`` brackets.  The *name* will be translated to lower case,
-   and quotes in the *value* have been removed, and character and entity references
-   have been replaced.  For instance, for the tag ``<A
-   HREF="http://www.cwi.nl/">``, this method would be called as
-   ``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
-
-   .. versionchanged:: 2.6
-      All entity references from :mod:`html.entities` are now replaced in the
-      attribute values.
-
-
-.. method:: HTMLParser.handle_startendtag(tag, attrs)
-
-   Similar to :meth:`handle_starttag`, but called when the parser encounters an
-   XHTML-style empty tag (``<a .../>``).  This method may be overridden by
-   subclasses which require this particular lexical information; the default
-   implementation simple calls :meth:`handle_starttag` and :meth:`handle_endtag`.
-
-
-.. method:: HTMLParser.handle_endtag(tag)
-
-   This method is called to handle the end tag of an element.  It is intended to be
-   overridden by a derived class; the base class implementation does nothing.  The
-   *tag* argument is the name of the tag converted to lower case.
-
-
-.. method:: HTMLParser.handle_data(data)
-
-   This method is called to process arbitrary data.  It is intended to be
-   overridden by a derived class; the base class implementation does nothing.
-
-
-.. method:: HTMLParser.handle_charref(name)
-
-   This method is called to process a character reference of the form ``&#ref;``.
-   It is intended to be overridden by a derived class; the base class
-   implementation does nothing.
-
-
-.. method:: HTMLParser.handle_entityref(name)
-
-   This method is called to process a general entity reference of the form
-   ``&name;`` where *name* is an general entity reference.  It is intended to be
-   overridden by a derived class; the base class implementation does nothing.
-
-
-.. method:: HTMLParser.handle_comment(data)
-
-   This method is called when a comment is encountered.  The *comment* argument is
-   a string containing the text between the ``--`` and ``--`` delimiters, but not
-   the delimiters themselves.  For example, the comment ``<!--text-->`` will cause
-   this method to be called with the argument ``'text'``.  It is intended to be
-   overridden by a derived class; the base class implementation does nothing.
-
-
-.. method:: HTMLParser.handle_decl(decl)
-
-   Method called when an SGML declaration is read by the parser.  The *decl*
-   parameter will be the entire contents of the declaration inside the ``<!``...\
-   ``>`` markup.  It is intended to be overridden by a derived class; the base
-   class implementation does nothing.
-
-
-.. method:: HTMLParser.handle_pi(data)
-
-   Method called when a processing instruction is encountered.  The *data*
-   parameter will contain the entire processing instruction. For example, for the
-   processing instruction ``<?proc color='red'>``, this method would be called as
-   ``handle_pi("proc color='red'")``.  It is intended to be overridden by a derived
-   class; the base class implementation does nothing.
-
-   .. note::
-
-      The :class:`HTMLParser` class uses the SGML syntactic rules for processing
-      instructions.  An XHTML processing instruction using the trailing ``'?'`` will
-      cause the ``'?'`` to be included in *data*.
-
-
-.. _htmlparser-example:
-
-Example HTML Parser Application
--------------------------------
-
-As a basic example, below is a very basic HTML parser that uses the
-:class:`HTMLParser` class to print out tags as they are encountered::
-
-   from html.parser import HTMLParser
-
-   class MyHTMLParser(HTMLParser):
-
-       def handle_starttag(self, tag, attrs):
-           print "Encountered the beginning of a %s tag" % tag
-
-       def handle_endtag(self, tag):
-           print "Encountered the end of a %s tag" % tag
-
diff --git a/Doc/library/markup.rst b/Doc/library/markup.rst
index dd0dd8f..e08f953 100644
--- a/Doc/library/markup.rst
+++ b/Doc/library/markup.rst
@@ -23,7 +23,8 @@ definition of the Python bindings for the DOM and SAX interfaces.
 
 .. toctree::
 
-   htmlparser.rst
+   html.parser.rst
+   html.entities.rst
    sgmllib.rst
    htmllib.rst
    pyexpat.rst
-- 
cgit v0.12