diff options
author | Georg Brandl <georg@python.org> | 2007-08-15 14:26:55 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2007-08-15 14:26:55 (GMT) |
commit | f56181ff53ba00b7bed3997a4dccd9a1b6217b57 (patch) | |
tree | 1200947a7ffc78c2719831e4c7fd900a8ab01368 /Doc/tools/sgmlconv | |
parent | af62d9abfb78067a54c769302005f952ed999f6a (diff) | |
download | cpython-f56181ff53ba00b7bed3997a4dccd9a1b6217b57.zip cpython-f56181ff53ba00b7bed3997a4dccd9a1b6217b57.tar.gz cpython-f56181ff53ba00b7bed3997a4dccd9a1b6217b57.tar.bz2 |
Delete the LaTeX doc tree.
Diffstat (limited to 'Doc/tools/sgmlconv')
-rw-r--r-- | Doc/tools/sgmlconv/Makefile | 67 | ||||
-rw-r--r-- | Doc/tools/sgmlconv/README | 58 | ||||
-rw-r--r-- | Doc/tools/sgmlconv/conversion.xml | 914 | ||||
-rwxr-xr-x | Doc/tools/sgmlconv/docfixer.py | 1073 | ||||
-rwxr-xr-x | Doc/tools/sgmlconv/esis2sgml.py | 264 | ||||
-rw-r--r-- | Doc/tools/sgmlconv/esistools.py | 312 | ||||
-rwxr-xr-x | Doc/tools/sgmlconv/latex2esis.py | 565 | ||||
-rw-r--r-- | Doc/tools/sgmlconv/make.rules | 48 |
8 files changed, 0 insertions, 3301 deletions
diff --git a/Doc/tools/sgmlconv/Makefile b/Doc/tools/sgmlconv/Makefile deleted file mode 100644 index d222933..0000000 --- a/Doc/tools/sgmlconv/Makefile +++ /dev/null @@ -1,67 +0,0 @@ -# Simple makefile to control XML generation for the entire document tree. -# This should be used from the top-level directory (Doc/), not the directory -# that actually contains this file: -# -# $ pwd -# .../Doc -# $ make -f tools/sgmlconv/Makefile - -TOPDIR=. -TOOLSDIR=tools - -SGMLRULES=../$(TOOLSDIR)/sgmlconv/make.rules -# The 'inst' and 'tut' directories break the conversion, so skip them for now. -SUBDIRS=api dist ext lib mac ref -SUBMAKE=$(MAKE) -f $(SGMLRULES) TOOLSDIR=../$(TOOLSDIR) - -all: xml - -.PHONY: esis xml -.PHONY: $(SUBDIRS) - -xml: - for DIR in $(SUBDIRS) ; do \ - (cd $$DIR && $(SUBMAKE) xml) || exit $$? ; done - -esis: - for DIR in $(SUBDIRS) ; do \ - (cd $$DIR && $(SUBMAKE) esis) || exit $$? ; done - -esis1: - for DIR in $(SUBDIRS) ; do \ - (cd $$DIR && $(SUBMAKE) esis1) || exit $$? ; done - -tarball: xml - tar cf - tools/sgmlconv */*.xml | gzip -9 >xml-1.5.2b2.tgz - -api: - cd api && $(SUBMAKE) - -dist: - cd dist && $(SUBMAKE) - -ext: - cd ext && $(SUBMAKE) - -inst: - cd inst && $(SUBMAKE) - -lib: - cd lib && $(SUBMAKE) - -mac: - cd mac && $(SUBMAKE) - -ref: - cd ref && $(SUBMAKE) - -tut: - cd tut && $(SUBMAKE) - -clean: - for DIR in $(SUBDIRS) ; do \ - (cd $$DIR && $(SUBMAKE) clean) || exit $$? ; done - -clobber: - for DIR in $(SUBDIRS) ; do \ - (cd $$DIR && $(SUBMAKE) clobber) || exit $$? ; done diff --git a/Doc/tools/sgmlconv/README b/Doc/tools/sgmlconv/README deleted file mode 100644 index 02564eb..0000000 --- a/Doc/tools/sgmlconv/README +++ /dev/null @@ -1,58 +0,0 @@ -These scripts and Makefile fragment are used to convert the Python -documentation in LaTeX format to XML. - -This material is preliminary and incomplete. Python 2.0 is required. - -To convert all documents to XML: - - cd Doc/ - make -f tools/sgmlconv/Makefile - -To convert one document to XML: - - cd Doc/<document-dir> - make -f ../tools/sgmlconv/make.rules TOOLSDIR=../tools - -Please send comments and bug reports to docs@python.org. - - -What do the tools do? ---------------------- - -latex2esis.py - Reads in a conversion specification written in XML - (conversion.xml), reads a LaTeX document fragment, and interprets - the markup according to the specification. The output is a stream - of ESIS events like those created by the nsgmls SGML parser, but - is *not* guaranteed to represent a single tree! This is done to - allow conversion per entity rather than per document. Since many - of the LaTeX files for the Python documentation contain two - sections on closely related modules, it is important to allow both - of the resulting <section> elements to exist in the same output - stream. Additionally, since comments are not supported in ESIS, - comments are converted to <COMMENT> elements, which might exist at - the same level as the top-level content elements. - - The output of latex2esis.py gets saved as <filename>.esis1. - -docfixer.py - This is the really painful part of the conversion. Well, it's the - second really painful part, but more of the pain is specific to - the structure of the Python documentation and desired output - rather than to the parsing of LaTeX markup. - - This script loads the ESIS data created by latex2esis.py into a - DOM document *fragment* (remember, the latex2esis.py output may - not be well-formed). Once loaded, it walks over the tree many - times looking for a variety of possible specific - micro-conversions. Most of the code is not in any way "general". - After processing the fragment, a new ESIS data stream is written - out. Like the input, it may not represent a well-formed - document, but does represent a parsed entity. - - The output of docfixer.py is what gets saved in <filename>.esis. - -esis2sgml.py - Reads an ESIS stream and convert to SGML or XML. This also - converts <COMMENT> elements to real comments. This works quickly - because there's not much to actually do. diff --git a/Doc/tools/sgmlconv/conversion.xml b/Doc/tools/sgmlconv/conversion.xml deleted file mode 100644 index f0151f4..0000000 --- a/Doc/tools/sgmlconv/conversion.xml +++ /dev/null @@ -1,914 +0,0 @@ -<?xml version="1.0" encoding="iso-8859-1"?> -<conversion> - <!-- Miscellaneous. --> - <macro name="declaremodule"> - <attribute name="id" optional="yes"/> - <attribute name="type"/> - <attribute name="name"/> - </macro> - <macro name="modulesynopsis"> - <content/> - </macro> - <macro name="platform"> - <content/> - </macro> - <macro name="deprecated"> - <attribute name="version"/> - <content/> - </macro> - <macro name="label"> - <attribute name="id"/> - </macro> - <macro name="nodename" outputname="label"> - <attribute name="id"/> - </macro> - <macro name="localmoduletable"/> - <macro name="manpage"> - <attribute name="name"/> - <attribute name="section"/> - </macro> - <macro name="module"> - <content/> - </macro> - <macro name="moduleauthor"> - <attribute name="name"/> - <attribute name="email"/> - </macro> - <macro name="citetitle"> - <attribute name="href" optional="yes"/> - <content/> - </macro> - <macro name="pep"> - <attribute name="num"/> - </macro> - <macro name="rfc"> - <attribute name="num"/> - </macro> - <macro name="sectionauthor" outputname="author"> - <attribute name="name"/> - <attribute name="email"/> - </macro> - <macro name="author"> - <attribute name="name"/> - </macro> - <macro name="authoraddress"> - <content/> - </macro> - <macro name="shortversion"/> - <macro name="note"> - <content/> - </macro> - <macro name="warning"> - <content/> - </macro> - <environment name="notice"> - <attribute name="role" optional="yes"/> - </environment> - - <macro name="menuselection"> - <content/> - </macro> - <macro name="sub"/> - - <!-- These are broken: we need to re-order the optional and required - parameters, making the optional parameter the content for the - element. latex2esis.py is not powerful enough to handle this. - --> - <macro name="versionadded"> - <attribute name="info" optional="yes"/> - <attribute name="version"/> - </macro> - <macro name="versionchanged"> - <attribute name="info" optional="yes"/> - <attribute name="version"/> - </macro> - - <!-- Module referencing. --> - <macro name="refmodule" outputname="module"> - <!-- this causes the optional parameter to \refmodule to be - discarded --> - <attribute name="" optional="yes"/> - <content/> - </macro> - - <!-- Information units. --> - <!-- C things. --> - <environment name="cfuncdesc"> - <attribute name="type"/> - <attribute name="name"/> - <child name="args"/> - </environment> - <environment name="csimplemacrodesc"> - <attribute name="name"/> - </environment> - <environment name="ctypedesc"> - <attribute name="tag" optional="yes"/> - <attribute name="name"/> - </environment> - <environment name="cvardesc"> - <attribute name="type"/> - <attribute name="name"/> - </environment> - - <!-- Python things. --> - <macro name="optional"> - <content/> - </macro> - <macro name="unspecified"/> - <macro name="moreargs"/> - <environment name="classdesc"> - <attribute name="name"/> - <child name="args"/> - </environment> - <environment name="classdesc*" outputname="classdesc"> - <attribute name="name"/> - </environment> - <environment name="datadesc"> - <attribute name="name"/> - </environment> - <environment name="datadescni" outputname="datadesc"> - <attribute name="index">no</attribute> - <attribute name="name"/> - </environment> - <macro name="dataline"> - <attribute name="name"/> - </macro> - <environment name="excclassdesc"> - <attribute name="name"/> - <child name="args"/> - </environment> - <environment name="excdesc"> - <attribute name="name"/> - </environment> - - <environment name="funcdesc"> - <attribute name="name"/> - <child name="args"/> - </environment> - <macro name="funcline"> - <attribute name="name"/> - <child name="args"/> - </macro> - <environment name="funcdescni" outputname="funcdesc"> - <attribute name="index">no</attribute> - <attribute name="name"/> - <child name="args"/> - </environment> - <macro name="funclineni" outputname="funcline"> - <attribute name="index">no</attribute> - <attribute name="name"/> - <child name="args"/> - </macro> - - <environment name="memberdesc"> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - </environment> - <environment name="memberdescni" outputname="memberdesc"> - <attribute name="index">no</attribute> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - </environment> - <macro name="memberline"> - <attribute name="name"/> - </macro> - - <environment name="methoddesc"> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - <child name="args"/> - </environment> - <macro name="methodline"> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - <child name="args"/> - </macro> - <environment name="methoddescni"> - <attribute name="index">no</attribute> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - <child name="args"/> - </environment> - <macro name="methodlineni" outputname="methodline"> - <attribute name="index">no</attribute> - <attribute name="class" optional="yes"/> - <attribute name="name"/> - <child name="args"/> - </macro> - - <environment name="opcodedesc"> - <attribute name="name"/> - <attribute name="var"/> - </environment> - - <!-- "See also:" sections. --> - <environment name="seealso*" outputname="seealso"> - <attribute name="sidebar">no</attribute> - </environment> - <macro name="seemodule"> - <!-- this causes the optional parameter to \seemodule to be - discarded --> - <attribute name="" optional="yes"/> - <attribute name="name"/> - <child name="description"/> - </macro> - <macro name="seepep"> - <attribute name="number"/> - <child name="title"/> - <child name="description"/> - </macro> - <macro name="seerfc"> - <attribute name="number"/> - <child name="title"/> - <child name="description"/> - </macro> - <macro name="seetext"> - <child name="description"/> - </macro> - <macro name="seetitle"> - <attribute name="href" optional="yes"/> - <child name="title"/> - <child name="description"/> - </macro> - <macro name="seeurl"> - <attribute name="href"/> - <child name="description"/> - </macro> - - <!-- Index-generating markup. --> - <macro name="index" outputname="indexterm"> - <attribute name="term1"/> - </macro> - <macro name="indexii" outputname="indexterm"> - <attribute name="term1"/> - <attribute name="term2"/> - </macro> - <macro name="indexiii" outputname="indexterm"> - <attribute name="term1"/> - <attribute name="term2"/> - <attribute name="term3"/> - </macro> - <macro name="indexiv" outputname="indexterm"> - <attribute name="term1"/> - <attribute name="term2"/> - <attribute name="term3"/> - <attribute name="term4"/> - </macro> - - <macro name="ttindex" outputname="indexterm"> - <attribute name="style">tt</attribute> - <attribute name="term1"/> - </macro> - - <macro name="refmodindex"> - <attribute name="module"/> - </macro> - <macro name="stmodindex"> - <attribute name="module"/> - </macro> - <macro name="refbimodindex" outputname="refmodindex"> - <attribute name="module"/> - </macro> - <macro name="refexmodindex" outputname="refmodindex"> - <attribute name="module"/> - </macro> - <macro name="refstmodindex" outputname="refmodindex"> - <attribute name="module"/> - </macro> - - <macro name="bifuncindex"> - <attribute name="name"/> - </macro> - <macro name="exindex"> - <attribute name="name"/> - </macro> - <macro name="obindex"> - <attribute name="name"/> - </macro> - <macro name="kwindex"> - <attribute name="name"/> - </macro> - <macro name="opindex"> - <attribute name="type"/> - </macro> - <macro name="stindex"> - <attribute name="type"/> - </macro> - <macro name="withsubitem"> - <attribute name="text"/> - <content/> - </macro> - <macro name="setindexsubitem"> - <attribute name="text"/> - </macro> - - <!-- Entity management. --> - <macro name="include" outputname="xi:include"> - <attribute name="href"/> - </macro> - <macro name="input" outputname="xi:include"> - <attribute name="href"/> - </macro> - - <!-- Large-scale document structure. --> - <macro name="documentclass"> - <attribute name="classname"/> - </macro> - - <macro name="usepackage"> - <attribute name="options" optional="yes"/> - <attribute name="pkg"/> - </macro> - - <environment name="document" - endcloses="chapter chapter* section section* - subsection subsection* - subsubsection subsubsection* - paragraph paragraph* subparagraph - subparagraph*"> - <attribute name="xmlns:xi" - >http://www.w3.org/2001/XInclude</attribute> - </environment> - - <macro name="chapter" - closes="chapter chapter* section section* subsection subsection* - subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="chapter*" outputname="chapter" - closes="chapter chapter* section section* subsection subsection* - subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - - <macro name="section" - closes="section section* subsection subsection* - subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="section*" outputname="section" - closes="section section* subsection subsection* - subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - - <macro name="subsection" - closes="subsection subsection* subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="subsection*" outputname="subsection" - closes="subsection subsection* subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - - <macro name="subsubsection" - closes="subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="subsubsection*" outputname="subsubsection" - closes="subsubsection subsubsection* - paragraph paragraph* subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - - <macro name="paragraph" - closes="paragraph paragraph* subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="paragraph*" outputname="paragraph" - closes="paragraph paragraph* subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - - <macro name="subparagraph" - closes="subparagraph subparagraph*"> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="subparagraph*" outputname="subparagraph" - closes="subparagraph subparagraph*"> - <attribute name="numbered">no</attribute> - <text> -</text> - <child name="title"/> - <content implied="yes"/> - </macro> - <macro name="title"> - <content/> - </macro> - - <macro name="appendix" outputname="back-matter" - closes="chapter chapter* section subsection subsubsection - paragraph subparagraph"/> - - <environment name="list" - endcloses="item"> - <attribute name="bullet"/> - <attribute name="init"/> - </environment> - <macro name="item" closes="item"> - <child name="leader" optional="yes"/> - <content implied="yes"/> - </macro> - - <macro name="ref"> - <attribute name="ref"/> - </macro> - - <environment name="description" outputname="descriptionlist" - endcloses="item"/> - - <environment name="enumerate" outputname="enumeration" - endcloses="item"/> - - <environment name="fulllineitems" - endcloses="item"/> - - <environment name="itemize" - endcloses="item"/> - - <environment name="definitions" outputname="definitionlist" - encloses="term"/> - <macro name="term" closes="definition"> - <!-- not really optional, but uses the [] syntax --> - <child name="term" optional="yes"/> - <child name="definition" implied="yes"/> - </macro> - - <environment name="alltt" outputname="verbatim"/> - <environment name="comment" verbatim="yes"/> - <environment name="verbatim" verbatim="yes"/> - <environment name="verbatim*" verbatim="yes"> - <!-- not used anywhere, but it's a standard LaTeXism --> - <attribute name="spaces">visible</attribute> - </environment> - <macro name="verbatiminput" ouptutname="xi:include"> - <attribute name="parse">text</attribute> - <attribute name="href"/> - </macro> - - <!-- Table markup. --> - <macro name="hline"/> - <environment name="tableii" outputname="table"> - <attribute name="cols">2</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <environment name="longtableii" outputname="table"> - <attribute name="cols">2</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <macro name="lineii" outputname="row"> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </macro> - - <environment name="tableiii" outputname="table"> - <attribute name="cols">3</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <environment name="longtableiii" outputname="table"> - <attribute name="cols">3</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <macro name="lineiii" outputname="row"> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </macro> - - <environment name="tableiv" outputname="table"> - <attribute name="cols">4</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <environment name="longtableiv" outputname="table"> - <attribute name="cols">4</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <macro name="lineiv" outputname="row"> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </macro> - - <environment name="tablev" outputname="table"> - <attribute name="cols">4</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <environment name="longtablev" outputname="table"> - <attribute name="cols">4</attribute> - <attribute name="colspec"/> - <attribute name="style"/> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </environment> - <macro name="linev" outputname="row"> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - <text> - </text> - <child name="entry"/> - </macro> - - <!-- These are handled at a later translation stage, at least for now. --> - <macro name="Cpp" outputname=""> - <text>C++</text> - </macro> - <macro name="geq" outputname=""> - <entityref name="geq"/> - </macro> - <macro name="infinity" outputname=""> - <entityref name="infin"/> - </macro> - <macro name="LaTeX" outputname=""> - <text>LaTeX</text> - </macro> - <macro name="ldots" outputname=""> - <text>...</text> - </macro> - <macro name="leq" outputname=""> - <entityref name="leq"/> - </macro> - <macro name="plusminus" outputname=""> - <entityref name="plusmn"/> - </macro> - <macro name="TeX" outputname=""> - <text>TeX</text> - </macro> - <macro name="version"/> - - <!-- Distutils things. --> - <macro name="command"> - <content/> - </macro> - <macro name="option"> - <content/> - </macro> - <macro name="filevar" outputname="var"> - <content/> - </macro> - <macro name="XXX" outputname="editorial-comment"> - <content/> - </macro> - - <!-- Grammar production lists --> - <environment name="productionlist"> - <attribute name="grammar" optional="yes"/> - </environment> - <macro name="production"> - <attribute name="token"/> - <content/> - </macro> - <macro name="productioncont"> - <content/> - </macro> - <macro name="token" outputname="grammartoken"> - <content/> - </macro> - <macro name="grammartoken"> - <content/> - </macro> - - <!-- Misc. --> - <macro name="emph"> - <content/> - </macro> - <macro name="strong"> - <content/> - </macro> - <macro name="textrm"> - <content/> - </macro> - <macro name="texttt"> - <content/> - </macro> - <macro name="code"> - <content/> - </macro> - <macro name="exception"> - <content/> - </macro> - <macro name="keyword"> - <content/> - </macro> - <macro name="samp"> - <content/> - </macro> - <macro name="class"> - <content/> - </macro> - <macro name="cdata"> - <content/> - </macro> - <macro name="cfunction"> - <content/> - </macro> - <macro name="csimplemacro"> - <content/> - </macro> - <macro name="ctype"> - <content/> - </macro> - <macro name="pytype"> - <content/> - </macro> - <macro name="character"> - <content/> - </macro> - <macro name="constant"> - <content/> - </macro> - <macro name="envvar" outputname="envar"> - <content/> - </macro> - <macro name="file" outputname="filename"> - <content/> - </macro> - <macro name="filenq" outputname="filename"> - <attribute name="quote">no</attribute> - <content/> - </macro> - <macro name="function"> - <content/> - </macro> - <macro name="kbd" outputname="keysym"> - <content/> - </macro> - <macro name="mailheader"> - <content/> - </macro> - <macro name="makevar"> - <content/> - </macro> - <macro name="method"> - <content/> - </macro> - <macro name="member"> - <content/> - </macro> - <macro name="mimetype"> - <content/> - </macro> - <macro name="newsgroup"> - <content/> - </macro> - <macro name="program" outputname="command"> - <content/> - </macro> - <macro name="programopt" outputname="option"> - <content/> - </macro> - <macro name="longprogramopt" outputname="longoption"> - <content/> - </macro> - <macro name="regexp"> - <content/> - </macro> - <macro name="var"> - <content/> - </macro> - <macro name="email"> - <content/> - </macro> - <macro name="ulink"> - <!-- order of the parameters makes this difficult; - we'll need to fix it up to <ulink href="...">...</ulink> - in docfixer.py. - --> - <child name="text"/> - <child name="href"/> - </macro> - <macro name="url"> - <content/> - </macro> - <macro name="footnote"> - <content/> - </macro> - <macro name="dfn" outputname="definedterm"> - <content/> - </macro> - - <macro name="mbox"> - <content/> - </macro> - - <!-- minimal math stuff to get by --> - <macro name="pi"/> - <macro name="sqrt"> - <content/> - </macro> - <macro name="frac" outputname="fraction"> - <child name="numerator"/> - <child name="denominator"/> - </macro> - <macro name="sum"> - <content/> - </macro> - - <macro name="leftline" outputname=""> - <content/> - </macro> - - <!-- Conversions to text; perhaps could be different? There's --> - <!-- no way for a style sheet to work with these this way. --> - <macro name="ABC" outputname=""> - <text>ABC</text> - </macro> - <macro name="ASCII" outputname=""> - <text>ASCII</text> - </macro> - <macro name="C" outputname=""> - <text>C</text> - </macro> - <macro name="EOF" outputname=""> - <text>EOF</text> - </macro> - <macro name="e" outputname=""> - <text>\</text> - </macro> - <macro name="NULL" outputname="constant"> - <text>NULL</text> - </macro> - <macro name="POSIX" outputname=""> - <text>POSIX</text> - </macro> - <macro name="UNIX" outputname=""> - <text>Unix</text> - </macro> - <macro name="textasciicircum" outputname=""> - <text>^</text> - </macro> - <macro name="textasciitilde" outputname=""> - <text>~</text> - </macro> - <macro name="textbackslash" outputname=""> - <text>\</text> - </macro> - <macro name="textbar" outputname=""> - <text>|</text> - </macro> - <macro name="textgreater" outputname=""> - <text>></text> - </macro> - <macro name="textless" outputname=""> - <text><</text> - </macro> - - <!-- These will end up disappearing as well! --> - <macro name="catcode" outputname=""/> - <macro name="fi" outputname=""/> - <macro name="ifhtml" outputname=""/> - <macro name="indexname" outputname=""/> - <macro name="labelwidth" outputname=""/> - <macro name="large" outputname=""/> - <macro name="leftmargin" outputname=""/> - <macro name="makeindex" outputname=""/> - <macro name="makemodindex" outputname=""/> - <macro name="maketitle" outputname=""/> - <macro name="noindent" outputname=""/> - <macro name="protect" outputname=""/> - <macro name="textwidth"/> - <macro name="renewcommand"> - <attribute name="macro"/> - <attribute name="nargs" optional="yes"/> - <content/> - </macro> - <macro name="tableofcontents" outputname=""/> - <macro name="vspace"> - <attribute name="size"/> - </macro> -</conversion> diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py deleted file mode 100755 index 81519ee..0000000 --- a/Doc/tools/sgmlconv/docfixer.py +++ /dev/null @@ -1,1073 +0,0 @@ -#! /usr/bin/env python - -"""Perform massive transformations on a document tree created from the LaTeX -of the Python documentation, and dump the ESIS data for the transformed tree. -""" - - -import errno -import esistools -import re -import sys -import xml.dom -import xml.dom.minidom - -ELEMENT = xml.dom.Node.ELEMENT_NODE -ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE -TEXT = xml.dom.Node.TEXT_NODE - - -class ConversionError(Exception): - pass - - -ewrite = sys.stderr.write -try: - # We can only do this trick on Unix (if tput is on $PATH)! - if sys.platform != "posix" or not sys.stderr.isatty(): - raise ImportError - import commands -except ImportError: - bwrite = ewrite -else: - def bwrite(s, BOLDON=commands.getoutput("tput bold"), - BOLDOFF=commands.getoutput("tput sgr0")): - ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) - - -PARA_ELEMENT = "para" - -DEBUG_PARA_FIXER = 0 - -if DEBUG_PARA_FIXER: - def para_msg(s): - ewrite("*** %s\n" % s) -else: - def para_msg(s): - pass - - -def get_first_element(doc, gi): - for n in doc.childNodes: - if n.nodeName == gi: - return n - -def extract_first_element(doc, gi): - node = get_first_element(doc, gi) - if node is not None: - doc.removeChild(node) - return node - - -def get_documentElement(node): - result = None - for child in node.childNodes: - if child.nodeType == ELEMENT: - result = child - return result - - -def set_tagName(elem, gi): - elem.nodeName = elem.tagName = gi - - -def find_all_elements(doc, gi): - nodes = [] - if doc.nodeName == gi: - nodes.append(doc) - for child in doc.childNodes: - if child.nodeType == ELEMENT: - if child.tagName == gi: - nodes.append(child) - for node in child.getElementsByTagName(gi): - nodes.append(node) - return nodes - -def find_all_child_elements(doc, gi): - nodes = [] - for child in doc.childNodes: - if child.nodeName == gi: - nodes.append(child) - return nodes - - -def find_all_elements_from_set(doc, gi_set): - return __find_all_elements_from_set(doc, gi_set, []) - -def __find_all_elements_from_set(doc, gi_set, nodes): - if doc.nodeName in gi_set: - nodes.append(doc) - for child in doc.childNodes: - if child.nodeType == ELEMENT: - __find_all_elements_from_set(child, gi_set, nodes) - return nodes - - -def simplify(doc, fragment): - # Try to rationalize the document a bit, since these things are simply - # not valid SGML/XML documents as they stand, and need a little work. - documentclass = "document" - inputs = [] - node = extract_first_element(fragment, "documentclass") - if node is not None: - documentclass = node.getAttribute("classname") - node = extract_first_element(fragment, "title") - if node is not None: - inputs.append(node) - # update the name of the root element - node = get_first_element(fragment, "document") - if node is not None: - set_tagName(node, documentclass) - # Move everything that comes before this node into this node; - # this will be the document element. - nodelist = fragment.childNodes - point = node.firstChild - while not nodelist[0].isSameNode(node): - node.insertBefore(nodelist[0], point) - while 1: - node = extract_first_element(fragment, "input") - if node is None: - break - inputs.append(node) - if inputs: - docelem = get_documentElement(fragment) - inputs.reverse() - for node in inputs: - text = doc.createTextNode("\n") - docelem.insertBefore(text, docelem.firstChild) - docelem.insertBefore(node, text) - docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) - while fragment.firstChild and fragment.firstChild.nodeType == TEXT: - fragment.removeChild(fragment.firstChild) - - -def cleanup_root_text(doc): - discards = [] - skip = 0 - for n in doc.childNodes: - prevskip = skip - skip = 0 - if n.nodeType == TEXT and not prevskip: - discards.append(n) - elif n.nodeName == "COMMENT": - skip = 1 - for node in discards: - doc.removeChild(node) - - -DESCRIPTOR_ELEMENTS = ( - "cfuncdesc", "cvardesc", "ctypedesc", - "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", - "excdesc", "funcdesc", "funcdescni", "opcodedesc", - "datadesc", "datadescni", - ) - -def fixup_descriptors(doc, fragment): - sections = find_all_elements(fragment, "section") - for section in sections: - find_and_fix_descriptors(doc, section) - - -def find_and_fix_descriptors(doc, container): - children = container.childNodes - for child in children: - if child.nodeType == ELEMENT: - tagName = child.tagName - if tagName in DESCRIPTOR_ELEMENTS: - rewrite_descriptor(doc, child) - elif tagName == "subsection": - find_and_fix_descriptors(doc, child) - - -def rewrite_descriptor(doc, descriptor): - # - # Do these things: - # 1. Add an "index='no'" attribute to the element if the tagName - # ends in 'ni', removing the 'ni' from the name. - # 2. Create a <signature> from the name attribute - # 2a.Create an <args> if it appears to be available. - # 3. Create additional <signature>s from <*line{,ni}> elements, - # if found. - # 4. If a <versionadded> is found, move it to an attribute on the - # descriptor. - # 5. Move remaining child nodes to a <description> element. - # 6. Put it back together. - # - # 1. - descname = descriptor.tagName - index = descriptor.getAttribute("name") != "no" - desctype = descname[:-4] # remove 'desc' - linename = desctype + "line" - if not index: - linename = linename + "ni" - # 2. - signature = doc.createElement("signature") - name = doc.createElement("name") - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(name) - name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) - descriptor.removeAttribute("name") - # 2a. - if descriptor.hasAttribute("var"): - if descname != "opcodedesc": - raise RuntimeError, \ - "got 'var' attribute on descriptor other than opcodedesc" - variable = descriptor.getAttribute("var") - if variable: - args = doc.createElement("args") - args.appendChild(doc.createTextNode(variable)) - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(args) - descriptor.removeAttribute("var") - newchildren = [signature] - children = descriptor.childNodes - pos = skip_leading_nodes(children) - if pos < len(children): - child = children[pos] - if child.nodeName == "args": - # move <args> to <signature>, or remove if empty: - child.parentNode.removeChild(child) - if len(child.childNodes): - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(child) - signature.appendChild(doc.createTextNode("\n ")) - # 3, 4. - pos = skip_leading_nodes(children, pos) - while pos < len(children) \ - and children[pos].nodeName in (linename, "versionadded"): - if children[pos].tagName == linename: - # this is really a supplemental signature, create <signature> - oldchild = children[pos].cloneNode(1) - try: - sig = methodline_to_signature(doc, children[pos]) - except KeyError: - print oldchild.toxml() - raise - newchildren.append(sig) - else: - # <versionadded added=...> - descriptor.setAttribute( - "added", children[pos].getAttribute("version")) - pos = skip_leading_nodes(children, pos + 1) - # 5. - description = doc.createElement("description") - description.appendChild(doc.createTextNode("\n")) - newchildren.append(description) - move_children(descriptor, description, pos) - last = description.childNodes[-1] - if last.nodeType == TEXT: - last.data = last.data.rstrip() + "\n " - # 6. - # should have nothing but whitespace and signature lines in <descriptor>; - # discard them - while descriptor.childNodes: - descriptor.removeChild(descriptor.childNodes[0]) - for node in newchildren: - descriptor.appendChild(doc.createTextNode("\n ")) - descriptor.appendChild(node) - descriptor.appendChild(doc.createTextNode("\n")) - - -def methodline_to_signature(doc, methodline): - signature = doc.createElement("signature") - signature.appendChild(doc.createTextNode("\n ")) - name = doc.createElement("name") - name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) - methodline.removeAttribute("name") - signature.appendChild(name) - if len(methodline.childNodes): - args = doc.createElement("args") - signature.appendChild(doc.createTextNode("\n ")) - signature.appendChild(args) - move_children(methodline, args) - signature.appendChild(doc.createTextNode("\n ")) - return signature - - -def move_children(origin, dest, start=0): - children = origin.childNodes - while start < len(children): - node = children[start] - origin.removeChild(node) - dest.appendChild(node) - - -def handle_appendix(doc, fragment): - # must be called after simplfy() if document is multi-rooted to begin with - docelem = get_documentElement(fragment) - toplevel = docelem.tagName == "manual" and "chapter" or "section" - appendices = 0 - nodes = [] - for node in docelem.childNodes: - if appendices: - nodes.append(node) - elif node.nodeType == ELEMENT: - appnodes = node.getElementsByTagName("appendix") - if appnodes: - appendices = 1 - parent = appnodes[0].parentNode - parent.removeChild(appnodes[0]) - parent.normalize() - if nodes: - map(docelem.removeChild, nodes) - docelem.appendChild(doc.createTextNode("\n\n\n")) - back = doc.createElement("back-matter") - docelem.appendChild(back) - back.appendChild(doc.createTextNode("\n")) - while nodes and nodes[0].nodeType == TEXT \ - and not nodes[0].data.strip(): - del nodes[0] - map(back.appendChild, nodes) - docelem.appendChild(doc.createTextNode("\n")) - - -def handle_labels(doc, fragment): - for label in find_all_elements(fragment, "label"): - id = label.getAttribute("id") - if not id: - continue - parent = label.parentNode - parentTagName = parent.tagName - if parentTagName == "title": - parent.parentNode.setAttribute("id", id) - else: - parent.setAttribute("id", id) - # now, remove <label id="..."/> from parent: - parent.removeChild(label) - if parentTagName == "title": - parent.normalize() - children = parent.childNodes - if children[-1].nodeType == TEXT: - children[-1].data = children[-1].data.rstrip() - - -def fixup_trailing_whitespace(doc, fragment, wsmap): - queue = [fragment] - fixups = [] - while queue: - node = queue[0] - del queue[0] - if wsmap.has_key(node.nodeName): - fixups.append(node) - for child in node.childNodes: - if child.nodeType == ELEMENT: - queue.append(child) - - # reverse the list to process from the inside out - fixups.reverse() - for node in fixups: - node.parentNode.normalize() - lastchild = node.lastChild - before, after = wsmap[node.tagName] - if lastchild.nodeType == TEXT: - data = lastchild.data.rstrip() + before - lastchild.data = data - norm = 0 - if wsmap[node.tagName]: - nextnode = node.nextSibling - if nextnode and nextnode.nodeType == TEXT: - nextnode.data = after + nextnode.data.lstrip() - else: - wsnode = doc.createTextNode(after) - node.parentNode.insertBefore(wsnode, nextnode) - # hack to get the title in place: - if node.tagName == "title" \ - and node.parentNode.firstChild.nodeType == ELEMENT: - node.parentNode.insertBefore(doc.createTextNode("\n "), - node.parentNode.firstChild) - node.parentNode.normalize() - - -def normalize(doc): - for node in doc.childNodes: - if node.nodeType == ELEMENT: - node.normalize() - - -def cleanup_trailing_parens(doc, element_names): - d = {} - for gi in element_names: - d[gi] = gi - rewrite_element = d.has_key - queue = [node for node in doc.childNodes if node.nodeType == ELEMENT] - while queue: - node = queue[0] - del queue[0] - if rewrite_element(node.tagName): - lastchild = node.lastChild - if lastchild and lastchild.nodeType == TEXT: - data = lastchild.data - if data.endswith("()"): - lastchild.data = data[:-2] - else: - for child in node.childNodes: - if child.nodeType == ELEMENT: - queue.append(child) - - -def contents_match(left, right): - left_children = left.childNodes - right_children = right.childNodes - if len(left_children) != len(right_children): - return 0 - for l, r in map(None, left_children, right_children): - nodeType = l.nodeType - if nodeType != r.nodeType: - return 0 - if nodeType == ELEMENT: - if l.tagName != r.tagName: - return 0 - # should check attributes, but that's not a problem here - if not contents_match(l, r): - return 0 - elif nodeType == TEXT: - if l.data != r.data: - return 0 - else: - # not quite right, but good enough - return 0 - return 1 - - -def create_module_info(doc, section): - # Heavy. - node = extract_first_element(section, "modulesynopsis") - if node is None: - return - set_tagName(node, "synopsis") - lastchild = node.childNodes[-1] - if lastchild.nodeType == TEXT \ - and lastchild.data[-1:] == ".": - lastchild.data = lastchild.data[:-1] - modauthor = extract_first_element(section, "moduleauthor") - if modauthor: - set_tagName(modauthor, "author") - modauthor.appendChild(doc.createTextNode( - modauthor.getAttribute("name"))) - modauthor.removeAttribute("name") - platform = extract_first_element(section, "platform") - if section.tagName == "section": - modinfo_pos = 2 - modinfo = doc.createElement("moduleinfo") - moddecl = extract_first_element(section, "declaremodule") - name = None - if moddecl: - modinfo.appendChild(doc.createTextNode("\n ")) - name = moddecl.attributes["name"].value - namenode = doc.createElement("name") - namenode.appendChild(doc.createTextNode(name)) - modinfo.appendChild(namenode) - type = moddecl.attributes.get("type") - if type: - type = type.value - modinfo.appendChild(doc.createTextNode("\n ")) - typenode = doc.createElement("type") - typenode.appendChild(doc.createTextNode(type)) - modinfo.appendChild(typenode) - versionadded = extract_first_element(section, "versionadded") - if versionadded: - modinfo.setAttribute("added", versionadded.getAttribute("version")) - title = get_first_element(section, "title") - if title: - children = title.childNodes - if len(children) >= 2 \ - and children[0].nodeName == "module" \ - and children[0].childNodes[0].data == name: - # this is it; morph the <title> into <short-synopsis> - first_data = children[1] - if first_data.data[:4] == " ---": - first_data.data = first_data.data[4:].lstrip() - set_tagName(title, "short-synopsis") - if children[-1].nodeType == TEXT \ - and children[-1].data[-1:] == ".": - children[-1].data = children[-1].data[:-1] - section.removeChild(title) - section.removeChild(section.childNodes[0]) - title.removeChild(children[0]) - modinfo_pos = 0 - else: - ewrite("module name in title doesn't match" - " <declaremodule/>; no <short-synopsis/>\n") - else: - ewrite("Unexpected condition: <section/> without <title/>\n") - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(node) - if title and not contents_match(title, node): - # The short synopsis is actually different, - # and needs to be stored: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(title) - if modauthor: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(modauthor) - if platform: - modinfo.appendChild(doc.createTextNode("\n ")) - modinfo.appendChild(platform) - modinfo.appendChild(doc.createTextNode("\n ")) - section.insertBefore(modinfo, section.childNodes[modinfo_pos]) - section.insertBefore(doc.createTextNode("\n "), modinfo) - # - # The rest of this removes extra newlines from where we cut out - # a lot of elements. A lot of code for minimal value, but keeps - # keeps the generated *ML from being too funny looking. - # - section.normalize() - children = section.childNodes - for i in range(len(children)): - node = children[i] - if node.nodeName == "moduleinfo": - nextnode = children[i+1] - if nextnode.nodeType == TEXT: - data = nextnode.data - s = data.lstrip() - if len(s) < (len(data) - 4): - nextnode.data = "\n\n\n" + s - - -def cleanup_synopses(doc, fragment): - for node in find_all_elements(fragment, "section"): - create_module_info(doc, node) - - -def fixup_table_structures(doc, fragment): - for table in find_all_elements(fragment, "table"): - fixup_table(doc, table) - - -def fixup_table(doc, table): - # create the table head - thead = doc.createElement("thead") - row = doc.createElement("row") - move_elements_by_name(doc, table, row, "entry") - thead.appendChild(doc.createTextNode("\n ")) - thead.appendChild(row) - thead.appendChild(doc.createTextNode("\n ")) - # create the table body - tbody = doc.createElement("tbody") - prev_row = None - last_was_hline = 0 - children = table.childNodes - for child in children: - if child.nodeType == ELEMENT: - tagName = child.tagName - if tagName == "hline" and prev_row is not None: - prev_row.setAttribute("rowsep", "1") - elif tagName == "row": - prev_row = child - # save the rows: - tbody.appendChild(doc.createTextNode("\n ")) - move_elements_by_name(doc, table, tbody, "row", sep="\n ") - # and toss the rest: - while children: - child = children[0] - nodeType = child.nodeType - if nodeType == TEXT: - if child.data.strip(): - raise ConversionError("unexpected free data in <%s>: %r" - % (table.tagName, child.data)) - table.removeChild(child) - continue - if nodeType == ELEMENT: - if child.tagName != "hline": - raise ConversionError( - "unexpected <%s> in table" % child.tagName) - table.removeChild(child) - continue - raise ConversionError( - "unexpected %s node in table" % child.__class__.__name__) - # nothing left in the <table>; add the <thead> and <tbody> - tgroup = doc.createElement("tgroup") - tgroup.appendChild(doc.createTextNode("\n ")) - tgroup.appendChild(thead) - tgroup.appendChild(doc.createTextNode("\n ")) - tgroup.appendChild(tbody) - tgroup.appendChild(doc.createTextNode("\n ")) - table.appendChild(tgroup) - # now make the <entry>s look nice: - for row in table.getElementsByTagName("row"): - fixup_row(doc, row) - - -def fixup_row(doc, row): - entries = [] - map(entries.append, row.childNodes[1:]) - for entry in entries: - row.insertBefore(doc.createTextNode("\n "), entry) -# row.appendChild(doc.createTextNode("\n ")) - - -def move_elements_by_name(doc, source, dest, name, sep=None): - nodes = [] - for child in source.childNodes: - if child.nodeName == name: - nodes.append(child) - for node in nodes: - source.removeChild(node) - dest.appendChild(node) - if sep: - dest.appendChild(doc.createTextNode(sep)) - - -RECURSE_INTO_PARA_CONTAINERS = ( - "chapter", "abstract", "enumerate", - "section", "subsection", "subsubsection", - "paragraph", "subparagraph", "back-matter", - "howto", "manual", - "item", "itemize", "fulllineitems", "enumeration", "descriptionlist", - "definitionlist", "definition", - ) - -PARA_LEVEL_ELEMENTS = ( - "moduleinfo", "title", "verbatim", "enumerate", "item", - "interpreter-session", "back-matter", "interactive-session", - "opcodedesc", "classdesc", "datadesc", - "cfuncdesc", "ctypedesc", "cvardesc", - "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni", - "funcdescni", "methoddescni", "excdescni", - "tableii", "tableiii", "tableiv", "localmoduletable", - "sectionauthor", "seealso", "itemize", - # include <para>, so we can just do it again to get subsequent paras: - PARA_ELEMENT, - ) - -PARA_LEVEL_PRECEEDERS = ( - "setindexsubitem", "author", - "stindex", "obindex", "COMMENT", "label", "xi:include", "title", - "versionadded", "versionchanged", "declaremodule", "modulesynopsis", - "moduleauthor", "indexterm", "leader", - ) - - -def fixup_paras(doc, fragment): - for child in fragment.childNodes: - if child.nodeName in RECURSE_INTO_PARA_CONTAINERS: - fixup_paras_helper(doc, child) - descriptions = find_all_elements(fragment, "description") - for description in descriptions: - fixup_paras_helper(doc, description) - - -def fixup_paras_helper(doc, container, depth=0): - # document is already normalized - children = container.childNodes - start = skip_leading_nodes(children) - while len(children) > start: - if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS: - # Something to recurse into: - fixup_paras_helper(doc, children[start]) - else: - # Paragraph material: - build_para(doc, container, start, len(children)) - if DEBUG_PARA_FIXER and depth == 10: - sys.exit(1) - start = skip_leading_nodes(children, start + 1) - - -def build_para(doc, parent, start, i): - children = parent.childNodes - after = start + 1 - have_last = 0 - BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS - # Collect all children until \n\n+ is found in a text node or a - # member of BREAK_ELEMENTS is found. - for j in range(start, i): - after = j + 1 - child = children[j] - nodeType = child.nodeType - if nodeType == ELEMENT: - if child.tagName in BREAK_ELEMENTS: - after = j - break - elif nodeType == TEXT: - pos = child.data.find("\n\n") - if pos == 0: - after = j - break - if pos >= 1: - child.splitText(pos) - break - else: - have_last = 1 - if (start + 1) > after: - raise ConversionError( - "build_para() could not identify content to turn into a paragraph") - if children[after - 1].nodeType == TEXT: - # we may need to split off trailing white space: - child = children[after - 1] - data = child.data - if data.rstrip() != data: - have_last = 0 - child.splitText(len(data.rstrip())) - para = doc.createElement(PARA_ELEMENT) - prev = None - indexes = range(start, after) - indexes.reverse() - for j in indexes: - node = parent.childNodes[j] - parent.removeChild(node) - para.insertBefore(node, prev) - prev = node - if have_last: - parent.appendChild(para) - parent.appendChild(doc.createTextNode("\n\n")) - return len(parent.childNodes) - else: - nextnode = parent.childNodes[start] - if nextnode.nodeType == TEXT: - if nextnode.data and nextnode.data[0] != "\n": - nextnode.data = "\n" + nextnode.data - else: - newnode = doc.createTextNode("\n") - parent.insertBefore(newnode, nextnode) - nextnode = newnode - start = start + 1 - parent.insertBefore(para, nextnode) - return start + 1 - - -def skip_leading_nodes(children, start=0): - """Return index into children of a node at which paragraph building should - begin or a recursive call to fixup_paras_helper() should be made (for - subsections, etc.). - - When the return value >= len(children), we've built all the paras we can - from this list of children. - """ - i = len(children) - while i > start: - # skip over leading comments and whitespace: - child = children[start] - nodeType = child.nodeType - if nodeType == TEXT: - data = child.data - shortened = data.lstrip() - if shortened: - if data != shortened: - # break into two nodes: whitespace and non-whitespace - child.splitText(len(data) - len(shortened)) - return start + 1 - return start - # all whitespace, just skip - elif nodeType == ELEMENT: - tagName = child.tagName - if tagName in RECURSE_INTO_PARA_CONTAINERS: - return start - if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: - return start - start = start + 1 - return start - - -def fixup_rfc_references(doc, fragment): - for rfcnode in find_all_elements_from_set(fragment, ("pep", "rfc")): - rfcnode.appendChild(doc.createTextNode( - rfcnode.tagName.upper() + " " + rfcnode.getAttribute("num"))) - - -def fixup_signatures(doc, fragment): - for child in fragment.childNodes: - if child.nodeType == ELEMENT: - args = child.getElementsByTagName("args") - for arg in args: - rewrite_args(doc, arg) - args = child.getElementsByTagName("constructor-args") - for arg in args: - rewrite_args(doc, arg) - -def rewrite_args(doc, arglist): - fixup_args(doc, arglist) - arglist.normalize() - if arglist.childNodes.length == 1 and arglist.firstChild.nodeType == TEXT: - node = arglist.firstChild - node.data = ' '.join(node.data.split()) - -def fixup_args(doc, arglist): - for child in arglist.childNodes: - if child.nodeName == "optional": - # found it; fix and return - arglist.insertBefore(doc.createTextNode("["), child) - optkids = child.childNodes - while optkids: - arglist.insertBefore(child.firstChild, child) - arglist.insertBefore(doc.createTextNode("]"), child) - arglist.removeChild(child) - return fixup_args(doc, arglist) - - -def fixup_sectionauthors(doc, fragment): - for sectauth in find_all_elements(fragment, "sectionauthor"): - section = sectauth.parentNode - section.removeChild(sectauth) - set_tagName(sectauth, "author") - sectauth.appendChild(doc.createTextNode( - sectauth.getAttribute("name"))) - sectauth.removeAttribute("name") - after = section.childNodes[2] - title = section.childNodes[1] - if title.nodeName != "title": - after = section.childNodes[0] - section.insertBefore(doc.createTextNode("\n "), after) - section.insertBefore(sectauth, after) - - -def fixup_verbatims(doc): - for verbatim in find_all_elements(doc, "verbatim"): - child = verbatim.childNodes[0] - if child.nodeType == TEXT \ - and child.data.lstrip().startswith(">>>"): - set_tagName(verbatim, "interactive-session") - - -def add_node_ids(fragment, counter=0): - fragment.node_id = counter - for node in fragment.childNodes: - counter = counter + 1 - if node.nodeType == ELEMENT: - counter = add_node_ids(node, counter) - else: - node.node_id = counter - return counter + 1 - - -def fixup_ulink(doc, fragment): - for ulink in find_all_elements(fragment, "ulink"): - children = ulink.childNodes - assert len(children) == 2 - text = children[0] - href = children[1] - href.normalize() - assert len(href.childNodes) == 1 - assert href.childNodes[0].nodeType == TEXT - url = href.childNodes[0].data - ulink.setAttribute("href", url) - ulink.removeChild(href) - content = text.childNodes - while len(content): - ulink.appendChild(content[0]) - ulink.removeChild(text) - - -REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex', - 'refexmodindex', 'refstmodindex') - -def fixup_refmodindexes(fragment): - # Locate <ref*modindex>...</> co-located with <module>...</>, and - # remove the <ref*modindex>, replacing it with index=index on the - # <module> element. - nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS) - d = {} - for node in nodes: - parent = node.parentNode - d[parent.node_id] = parent - del nodes - map(fixup_refmodindexes_chunk, d.values()) - - -def fixup_refmodindexes_chunk(container): - # node is probably a <para>; let's see how often it isn't: - if container.tagName != PARA_ELEMENT: - bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) - module_entries = find_all_elements(container, "module") - if not module_entries: - return - index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS) - removes = [] - for entry in index_entries: - children = entry.childNodes - if len(children) != 0: - bwrite("--- unexpected number of children for %s node:\n" - % entry.tagName) - ewrite(entry.toxml() + "\n") - continue - found = 0 - module_name = entry.getAttribute("module") - for node in module_entries: - if len(node.childNodes) != 1: - continue - this_name = node.childNodes[0].data - if this_name == module_name: - found = 1 - node.setAttribute("index", "yes") - if found: - removes.append(entry) - for node in removes: - container.removeChild(node) - - -def fixup_bifuncindexes(fragment): - nodes = find_all_elements(fragment, 'bifuncindex') - d = {} - # make sure that each parent is only processed once: - for node in nodes: - parent = node.parentNode - d[parent.node_id] = parent - del nodes - map(fixup_bifuncindexes_chunk, d.values()) - - -def fixup_bifuncindexes_chunk(container): - removes = [] - entries = find_all_child_elements(container, "bifuncindex") - function_entries = find_all_child_elements(container, "function") - for entry in entries: - function_name = entry.getAttribute("name") - found = 0 - for func_entry in function_entries: - t2 = func_entry.childNodes[0].data - if t2[-2:] != "()": - continue - t2 = t2[:-2] - if t2 == function_name: - func_entry.setAttribute("index", "yes") - func_entry.setAttribute("module", "__builtin__") - if not found: - found = 1 - removes.append(entry) - for entry in removes: - container.removeChild(entry) - - -def join_adjacent_elements(container, gi): - queue = [container] - while queue: - parent = queue.pop() - i = 0 - children = parent.childNodes - nchildren = len(children) - while i < (nchildren - 1): - child = children[i] - if child.nodeName == gi: - if children[i+1].nodeName == gi: - ewrite("--- merging two <%s/> elements\n" % gi) - child = children[i] - nextchild = children[i+1] - nextchildren = nextchild.childNodes - while len(nextchildren): - node = nextchildren[0] - nextchild.removeChild(node) - child.appendChild(node) - parent.removeChild(nextchild) - continue - if child.nodeType == ELEMENT: - queue.append(child) - i = i + 1 - - -_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") - -def write_esis(doc, ofp, knownempty): - for node in doc.childNodes: - nodeType = node.nodeType - if nodeType == ELEMENT: - gi = node.tagName - if knownempty(gi): - if node.hasChildNodes(): - raise ValueError, \ - "declared-empty node <%s> has children" % gi - ofp.write("e\n") - for k, value in node.attributes.items(): - if _token_rx.match(value): - dtype = "TOKEN" - else: - dtype = "CDATA" - ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value))) - ofp.write("(%s\n" % gi) - write_esis(node, ofp, knownempty) - ofp.write(")%s\n" % gi) - elif nodeType == TEXT: - ofp.write("-%s\n" % esistools.encode(node.data)) - elif nodeType == ENTITY_REFERENCE: - ofp.write("&%s\n" % node.nodeName) - else: - raise RuntimeError, "unsupported node type: %s" % nodeType - - -def convert(ifp, ofp): - events = esistools.parse(ifp) - toktype, doc = events.getEvent() - fragment = doc.createDocumentFragment() - events.expandNode(fragment) - - normalize(fragment) - simplify(doc, fragment) - handle_labels(doc, fragment) - handle_appendix(doc, fragment) - fixup_trailing_whitespace(doc, fragment, { - # element -> (before-end-tag, after-end-tag) - "abstract": ("\n", "\n"), - "title": ("", "\n"), - "chapter": ("\n", "\n\n\n"), - "section": ("\n", "\n\n\n"), - "subsection": ("\n", "\n\n"), - "subsubsection": ("\n", "\n\n"), - "paragraph": ("\n", "\n\n"), - "subparagraph": ("\n", "\n\n"), - "description": ("\n", "\n\n"), - "enumeration": ("\n", "\n\n"), - "item": ("\n", "\n\n"), - }) - cleanup_root_text(doc) - cleanup_trailing_parens(fragment, ["function", "method", "cfunction"]) - cleanup_synopses(doc, fragment) - fixup_descriptors(doc, fragment) - fixup_verbatims(fragment) - normalize(fragment) - fixup_paras(doc, fragment) - fixup_sectionauthors(doc, fragment) - fixup_table_structures(doc, fragment) - fixup_rfc_references(doc, fragment) - fixup_signatures(doc, fragment) - fixup_ulink(doc, fragment) - add_node_ids(fragment) - fixup_refmodindexes(fragment) - fixup_bifuncindexes(fragment) - # Take care of ugly hacks in the LaTeX markup to avoid LaTeX and - # LaTeX2HTML screwing with GNU-style long options (the '--' problem). - join_adjacent_elements(fragment, "option") - # Attempt to avoid trailing blank lines: - fragment.normalize() - if fragment.lastChild.data[-1:] == "\n": - fragment.lastChild.data = fragment.lastChild.data.rstrip() + "\n" - # - d = {} - for gi in events.parser.get_empties(): - d[gi] = gi - for key in ("author", "pep", "rfc"): - if d.has_key(key): - del d[key] - knownempty = d.has_key - # - try: - write_esis(fragment, ofp, knownempty) - except IOError, (err, msg): - # Ignore EPIPE; it just means that whoever we're writing to stopped - # reading. The rest of the output would be ignored. All other errors - # should still be reported, - if err != errno.EPIPE: - raise - - -def main(): - if len(sys.argv) == 1: - ifp = sys.stdin - ofp = sys.stdout - elif len(sys.argv) == 2: - ifp = open(sys.argv[1]) - ofp = sys.stdout - elif len(sys.argv) == 3: - ifp = open(sys.argv[1]) - import StringIO - ofp = StringIO.StringIO() - else: - usage() - sys.exit(2) - convert(ifp, ofp) - if len(sys.argv) == 3: - fp = open(sys.argv[2], "w") - fp.write(ofp.getvalue()) - fp.close() - ofp.close() - - -if __name__ == "__main__": - main() diff --git a/Doc/tools/sgmlconv/esis2sgml.py b/Doc/tools/sgmlconv/esis2sgml.py deleted file mode 100755 index b6f9a44..0000000 --- a/Doc/tools/sgmlconv/esis2sgml.py +++ /dev/null @@ -1,264 +0,0 @@ -#! /usr/bin/env python - -"""Convert ESIS events to SGML or XML markup. - -This is limited, but seems sufficient for the ESIS generated by the -latex2esis.py script when run over the Python documentation. -""" - -# This should have an explicit option to indicate whether the *INPUT* was -# generated from an SGML or an XML application. - -import errno -import os -import re -import string - -from xml.sax.saxutils import escape - -import esistools - - -AUTOCLOSE = () - -EMPTIES_FILENAME = "../sgml/empties.dat" -LIST_EMPTIES = 0 - - -_elem_map = {} -_attr_map = {} -_token_map = {} - -_normalize_case = str - -def map_gi(sgmlgi, map): - uncased = _normalize_case(sgmlgi) - try: - return map[uncased] - except IndexError: - map[uncased] = sgmlgi - return sgmlgi - -def null_map_gi(sgmlgi, map): - return sgmlgi - - -def format_attrs(attrs, xml=0): - attrs = attrs.items() - attrs.sort() - parts = [] - append = parts.append - for name, value in attrs: - if xml: - append('%s="%s"' % (name, escape(value))) - else: - # this is a little bogus, but should do for now - if name == value and isnmtoken(value): - append(value) - elif istoken(value): - if value == "no" + name: - append(value) - else: - append("%s=%s" % (name, value)) - else: - append('%s="%s"' % (name, escape(value))) - if parts: - parts.insert(0, '') - return " ".join(parts) - - -_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE) -def isnmtoken(s): - return _nmtoken_rx.match(s) is not None - -_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE) -def istoken(s): - return _token_rx.match(s) is not None - - -def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()): - if xml: - autoclose = () - attrs = {} - lastopened = None - knownempties = [] - knownempty = 0 - lastempty = 0 - inverbatim = 0 - while 1: - line = ifp.readline() - if not line: - break - - type = line[0] - data = line[1:] - if data and data[-1] == "\n": - data = data[:-1] - if type == "-": - data = esistools.decode(data) - data = escape(data) - if not inverbatim: - data = data.replace("---", "—") - ofp.write(data) - if "\n" in data: - lastopened = None - knownempty = 0 - lastempty = 0 - elif type == "(": - if data == "COMMENT": - ofp.write("<!--") - continue - data = map_gi(data, _elem_map) - if knownempty and xml: - ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) - else: - ofp.write("<%s%s>" % (data, format_attrs(attrs, xml))) - if knownempty and data not in knownempties: - # accumulate knowledge! - knownempties.append(data) - attrs = {} - lastopened = data - lastempty = knownempty - knownempty = 0 - inverbatim = data in verbatims - elif type == ")": - if data == "COMMENT": - ofp.write("-->") - continue - data = map_gi(data, _elem_map) - if xml: - if not lastempty: - ofp.write("</%s>" % data) - elif data not in knownempties: - if data in autoclose: - pass - elif lastopened == data: - ofp.write("</>") - else: - ofp.write("</%s>" % data) - lastopened = None - lastempty = 0 - inverbatim = 0 - elif type == "A": - name, type, value = data.split(" ", 2) - name = map_gi(name, _attr_map) - attrs[name] = esistools.decode(value) - elif type == "e": - knownempty = 1 - elif type == "&": - ofp.write("&%s;" % data) - knownempty = 0 - else: - raise RuntimeError, "unrecognized ESIS event type: '%s'" % type - - if LIST_EMPTIES: - dump_empty_element_names(knownempties) - - -def dump_empty_element_names(knownempties): - d = {} - for gi in knownempties: - d[gi] = gi - knownempties.append("") - if os.path.isfile(EMPTIES_FILENAME): - fp = open(EMPTIES_FILENAME) - while 1: - line = fp.readline() - if not line: - break - gi = line.strip() - if gi: - d[gi] = gi - fp = open(EMPTIES_FILENAME, "w") - gilist = d.keys() - gilist.sort() - fp.write("\n".join(gilist)) - fp.write("\n") - fp.close() - - -def update_gi_map(map, names, fromsgml=1): - for name in names.split(","): - if fromsgml: - uncased = name.lower() - else: - uncased = name - map[uncased] = name - - -def main(): - import getopt - import sys - # - autoclose = AUTOCLOSE - xml = 1 - xmldecl = 0 - elem_names = '' - attr_names = '' - value_names = '' - verbatims = ('verbatim', 'interactive-session') - opts, args = getopt.getopt(sys.argv[1:], "adesx", - ["autoclose=", "declare", "sgml", "xml", - "elements-map=", "attributes-map", - "values-map="]) - for opt, arg in opts: - if opt in ("-d", "--declare"): - xmldecl = 1 - elif opt == "-e": - global LIST_EMPTIES - LIST_EMPTIES = 1 - elif opt in ("-s", "--sgml"): - xml = 0 - elif opt in ("-x", "--xml"): - xml = 1 - elif opt in ("-a", "--autoclose"): - autoclose = arg.split(",") - elif opt == "--elements-map": - elem_names = ("%s,%s" % (elem_names, arg))[1:] - elif opt == "--attributes-map": - attr_names = ("%s,%s" % (attr_names, arg))[1:] - elif opt == "--values-map": - value_names = ("%s,%s" % (value_names, arg))[1:] - # - # open input streams: - # - if len(args) == 0: - ifp = sys.stdin - ofp = sys.stdout - elif len(args) == 1: - ifp = open(args[0]) - ofp = sys.stdout - elif len(args) == 2: - ifp = open(args[0]) - ofp = open(args[1], "w") - else: - usage() - sys.exit(2) - # - # setup the name maps: - # - if elem_names or attr_names or value_names: - # assume the origin was SGML; ignore case of the names from the ESIS - # stream but set up conversion tables to get the case right on output - global _normalize_case - _normalize_case = string.lower - update_gi_map(_elem_map, elem_names.split(",")) - update_gi_map(_attr_map, attr_names.split(",")) - update_gi_map(_values_map, value_names.split(",")) - else: - global map_gi - map_gi = null_map_gi - # - # run the conversion: - # - try: - if xml and xmldecl: - opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') - convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims) - except IOError, (err, msg): - if err != errno.EPIPE: - raise - - -if __name__ == "__main__": - main() diff --git a/Doc/tools/sgmlconv/esistools.py b/Doc/tools/sgmlconv/esistools.py deleted file mode 100644 index 833fea1..0000000 --- a/Doc/tools/sgmlconv/esistools.py +++ /dev/null @@ -1,312 +0,0 @@ -"""Miscellaneous utility functions useful for dealing with ESIS streams.""" - -import re - -import xml.dom.pulldom - -import xml.sax -import xml.sax.handler -import xml.sax.xmlreader - - -_data_match = re.compile(r"[^\\][^\\]*").match - -def decode(s): - r = '' - while s: - m = _data_match(s) - if m: - r = r + m.group() - s = s[m.end():] - elif s[1] == "\\": - r = r + "\\" - s = s[2:] - elif s[1] == "n": - r = r + "\n" - s = s[2:] - elif s[1] == "%": - s = s[2:] - n, s = s.split(";", 1) - r = r + unichr(int(n)) - else: - raise ValueError, "can't handle %r" % s - return r - - -_charmap = {} -for c in range(128): - _charmap[chr(c)] = chr(c) - _charmap[unichr(c + 128)] = chr(c + 128) -_charmap["\n"] = r"\n" -_charmap["\\"] = r"\\" -del c - -_null_join = ''.join -def encode(s): - try: - return _null_join(map(_charmap.get, s)) - except TypeError: - raise Exception("could not encode %r: %r" % (s, map(_charmap.get, s))) - - -class ESISReader(xml.sax.xmlreader.XMLReader): - """SAX Reader which reads from an ESIS stream. - - No verification of the document structure is performed by the - reader; a general verifier could be used as the target - ContentHandler instance. - - """ - _decl_handler = None - _lexical_handler = None - - _public_id = None - _system_id = None - - _buffer = "" - _is_empty = 0 - _lineno = 0 - _started = 0 - - def __init__(self, contentHandler=None, errorHandler=None): - xml.sax.xmlreader.XMLReader.__init__(self) - self._attrs = {} - self._attributes = Attributes(self._attrs) - self._locator = Locator() - self._empties = {} - if contentHandler: - self.setContentHandler(contentHandler) - if errorHandler: - self.setErrorHandler(errorHandler) - - def get_empties(self): - return self._empties.keys() - - # - # XMLReader interface - # - - def parse(self, source): - raise RuntimeError - self._locator._public_id = source.getPublicId() - self._locator._system_id = source.getSystemId() - fp = source.getByteStream() - handler = self.getContentHandler() - if handler: - handler.startDocument() - lineno = 0 - while 1: - token, data = self._get_token(fp) - if token is None: - break - lineno = lineno + 1 - self._locator._lineno = lineno - self._handle_token(token, data) - handler = self.getContentHandler() - if handler: - handler.startDocument() - - def feed(self, data): - if not self._started: - handler = self.getContentHandler() - if handler: - handler.startDocument() - self._started = 1 - data = self._buffer + data - self._buffer = None - lines = data.split("\n") - if lines: - for line in lines[:-1]: - self._lineno = self._lineno + 1 - self._locator._lineno = self._lineno - if not line: - e = xml.sax.SAXParseException( - "ESIS input line contains no token type mark", - None, self._locator) - self.getErrorHandler().error(e) - else: - self._handle_token(line[0], line[1:]) - self._buffer = lines[-1] - else: - self._buffer = "" - - def close(self): - handler = self.getContentHandler() - if handler: - handler.endDocument() - self._buffer = "" - - def _get_token(self, fp): - try: - line = fp.readline() - except IOError, e: - e = SAXException("I/O error reading input stream", e) - self.getErrorHandler().fatalError(e) - return - if not line: - return None, None - if line[-1] == "\n": - line = line[:-1] - if not line: - e = xml.sax.SAXParseException( - "ESIS input line contains no token type mark", - None, self._locator) - self.getErrorHandler().error(e) - return - return line[0], line[1:] - - def _handle_token(self, token, data): - handler = self.getContentHandler() - if token == '-': - if data and handler: - handler.characters(decode(data)) - elif token == ')': - if handler: - handler.endElement(decode(data)) - elif token == '(': - if self._is_empty: - self._empties[data] = 1 - self._is_empty = 0 - if handler: - handler.startElement(data, self._attributes) - self._attrs.clear() - elif token == 'A': - name, value = data.split(' ', 1) - if value != "IMPLIED": - type, value = value.split(' ', 1) - self._attrs[name] = (decode(value), type) - elif token == '&': - # entity reference in SAX? - pass - elif token == '?': - if handler: - if ' ' in data: - target, data = data.split(None, 1) - else: - target, data = data, "" - handler.processingInstruction(target, decode(data)) - elif token == 'N': - handler = self.getDTDHandler() - if handler: - handler.notationDecl(data, self._public_id, self._system_id) - self._public_id = None - self._system_id = None - elif token == 'p': - self._public_id = decode(data) - elif token == 's': - self._system_id = decode(data) - elif token == 'e': - self._is_empty = 1 - elif token == 'C': - pass - else: - e = SAXParseException("unknown ESIS token in event stream", - None, self._locator) - self.getErrorHandler().error(e) - - def setContentHandler(self, handler): - old = self.getContentHandler() - if old: - old.setDocumentLocator(None) - if handler: - handler.setDocumentLocator(self._locator) - xml.sax.xmlreader.XMLReader.setContentHandler(self, handler) - - def getProperty(self, property): - if property == xml.sax.handler.property_lexical_handler: - return self._lexical_handler - - elif property == xml.sax.handler.property_declaration_handler: - return self._decl_handler - - else: - raise xml.sax.SAXNotRecognizedException("unknown property %r" - % (property, )) - - def setProperty(self, property, value): - if property == xml.sax.handler.property_lexical_handler: - if self._lexical_handler: - self._lexical_handler.setDocumentLocator(None) - if value: - value.setDocumentLocator(self._locator) - self._lexical_handler = value - - elif property == xml.sax.handler.property_declaration_handler: - if self._decl_handler: - self._decl_handler.setDocumentLocator(None) - if value: - value.setDocumentLocator(self._locator) - self._decl_handler = value - - else: - raise xml.sax.SAXNotRecognizedException() - - def getFeature(self, feature): - if feature == xml.sax.handler.feature_namespaces: - return 1 - else: - return xml.sax.xmlreader.XMLReader.getFeature(self, feature) - - def setFeature(self, feature, enabled): - if feature == xml.sax.handler.feature_namespaces: - pass - else: - xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled) - - -class Attributes(xml.sax.xmlreader.AttributesImpl): - # self._attrs has the form {name: (value, type)} - - def getType(self, name): - return self._attrs[name][1] - - def getValue(self, name): - return self._attrs[name][0] - - def getValueByQName(self, name): - return self._attrs[name][0] - - def __getitem__(self, name): - return self._attrs[name][0] - - def get(self, name, default=None): - if self._attrs.has_key(name): - return self._attrs[name][0] - return default - - def items(self): - L = [] - for name, (value, type) in self._attrs.items(): - L.append((name, value)) - return L - - def values(self): - L = [] - for value, type in self._attrs.values(): - L.append(value) - return L - - -class Locator(xml.sax.xmlreader.Locator): - _lineno = -1 - _public_id = None - _system_id = None - - def getLineNumber(self): - return self._lineno - - def getPublicId(self): - return self._public_id - - def getSystemId(self): - return self._system_id - - -def parse(stream_or_string, parser=None): - if type(stream_or_string) in [type(""), type(u"")]: - stream = open(stream_or_string) - else: - stream = stream_or_string - if not parser: - parser = ESISReader() - return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20) diff --git a/Doc/tools/sgmlconv/latex2esis.py b/Doc/tools/sgmlconv/latex2esis.py deleted file mode 100755 index 643ef2c..0000000 --- a/Doc/tools/sgmlconv/latex2esis.py +++ /dev/null @@ -1,565 +0,0 @@ -#! /usr/bin/env python - -"""Generate ESIS events based on a LaTeX source document and -configuration data. - -The conversion is not strong enough to work with arbitrary LaTeX -documents; it has only been designed to work with the highly stylized -markup used in the standard Python documentation. A lot of -information about specific markup is encoded in the control table -passed to the convert() function; changing this table can allow this -tool to support additional LaTeX markups. - -The format of the table is largely undocumented; see the commented -headers where the table is specified in main(). There is no provision -to load an alternate table from an external file. -""" - -import errno -import getopt -import os -import re -import sys -import xml.sax -import xml.sax.saxutils - -from esistools import encode - - -DEBUG = 0 - - -class LaTeXFormatError(Exception): - pass - - -class LaTeXStackError(LaTeXFormatError): - def __init__(self, found, stack): - msg = "environment close for %s doesn't match;\n stack = %s" \ - % (found, stack) - self.found = found - self.stack = stack[:] - LaTeXFormatError.__init__(self, msg) - - -_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") -_end_env_rx = re.compile(r"[\\]end{([^}]*)}") -_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)") -_comment_rx = re.compile("%+ ?(.*)\n[ \t]*") -_text_rx = re.compile(r"[^]~%\\{}]+") -_optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE) -# _parameter_rx is this complicated to allow {...} inside a parameter; -# this is useful to match tabular layout specifications like {c|p{24pt}} -_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}") -_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") -_start_group_rx = re.compile("[ \n]*{") -_start_optional_rx = re.compile("[ \n]*[[]") - - -ESCAPED_CHARS = "$%#^ {}&~" - - -def dbgmsg(msg): - if DEBUG: - sys.stderr.write(msg + "\n") - -def pushing(name, point, depth): - dbgmsg("pushing <%s> at %s" % (name, point)) - -def popping(name, point, depth): - dbgmsg("popping </%s> at %s" % (name, point)) - - -class _Stack(list): - def append(self, entry): - if not isinstance(entry, str): - raise LaTeXFormatError("cannot push non-string on stack: %r" - % (entry, )) - #dbgmsg("%s<%s>" % (" "*len(self.data), entry)) - list.append(self, entry) - - def pop(self, index=-1): - entry = self[index] - del self[index] - #dbgmsg("%s</%s>" % (" " * len(self), entry)) - - def __delitem__(self, index): - entry = self[index] - list.__delitem__(self, index) - #dbgmsg("%s</%s>" % (" " * len(self), entry)) - - -def new_stack(): - if DEBUG: - return _Stack() - else: - return [] - - -class Conversion: - def __init__(self, ifp, ofp, table): - self.write = ofp.write - self.ofp = ofp - self.table = table - L = [s.rstrip() for s in ifp.readlines()] - L.append("") - self.line = "\n".join(L) - self.preamble = 1 - - def convert(self): - self.subconvert() - - def subconvert(self, endchar=None, depth=0): - # - # Parses content, including sub-structures, until the character - # 'endchar' is found (with no open structures), or until the end - # of the input data is endchar is None. - # - stack = new_stack() - line = self.line - while line: - if line[0] == endchar and not stack: - self.line = line - return line - m = _comment_rx.match(line) - if m: - text = m.group(1) - if text: - self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" - % encode(text)) - line = line[m.end():] - continue - m = _begin_env_rx.match(line) - if m: - name = m.group(1) - entry = self.get_env_entry(name) - # re-write to use the macro handler - line = r"\%s %s" % (name, line[m.end():]) - continue - m = _end_env_rx.match(line) - if m: - # end of environment - envname = m.group(1) - entry = self.get_entry(envname) - while stack and envname != stack[-1] \ - and stack[-1] in entry.endcloses: - self.write(")%s\n" % stack.pop()) - if stack and envname == stack[-1]: - self.write(")%s\n" % entry.outputname) - del stack[-1] - else: - raise LaTeXStackError(envname, stack) - line = line[m.end():] - continue - m = _begin_macro_rx.match(line) - if m: - # start of macro - macroname = m.group(1) - if macroname == "c": - # Ugh! This is a combining character... - endpos = m.end() - self.combining_char("c", line[endpos]) - line = line[endpos + 1:] - continue - entry = self.get_entry(macroname) - if entry.verbatim: - # magic case! - pos = line.find("\\end{%s}" % macroname) - text = line[m.end(1):pos] - stack.append(entry.name) - self.write("(%s\n" % entry.outputname) - self.write("-%s\n" % encode(text)) - self.write(")%s\n" % entry.outputname) - stack.pop() - line = line[pos + len("\\end{%s}" % macroname):] - continue - while stack and stack[-1] in entry.closes: - top = stack.pop() - topentry = self.get_entry(top) - if topentry.outputname: - self.write(")%s\n-\\n\n" % topentry.outputname) - # - if entry.outputname and entry.empty: - self.write("e\n") - # - params, optional, empty = self.start_macro(macroname) - # rip off the macroname - if params: - line = line[m.end(1):] - elif empty: - line = line[m.end(1):] - else: - line = line[m.end():] - opened = 0 - implied_content = 0 - - # handle attribute mappings here: - for pentry in params: - if pentry.type == "attribute": - if pentry.optional: - m = _optional_rx.match(line) - if m and entry.outputname: - line = line[m.end():] - self.dump_attr(pentry, m.group(1)) - elif pentry.text and entry.outputname: - # value supplied by conversion spec: - self.dump_attr(pentry, pentry.text) - else: - m = _parameter_rx.match(line) - if not m: - raise LaTeXFormatError( - "could not extract parameter %s for %s: %r" - % (pentry.name, macroname, line[:100])) - if entry.outputname: - self.dump_attr(pentry, m.group(1)) - line = line[m.end():] - elif pentry.type == "child": - if pentry.optional: - m = _optional_rx.match(line) - if m: - line = line[m.end():] - if entry.outputname and not opened: - opened = 1 - self.write("(%s\n" % entry.outputname) - stack.append(macroname) - stack.append(pentry.name) - self.write("(%s\n" % pentry.name) - self.write("-%s\n" % encode(m.group(1))) - self.write(")%s\n" % pentry.name) - stack.pop() - else: - if entry.outputname and not opened: - opened = 1 - self.write("(%s\n" % entry.outputname) - stack.append(entry.name) - self.write("(%s\n" % pentry.name) - stack.append(pentry.name) - self.line = skip_white(line)[1:] - line = self.subconvert( - "}", len(stack) + depth + 1)[1:] - self.write(")%s\n" % stack.pop()) - elif pentry.type == "content": - if pentry.implied: - implied_content = 1 - else: - if entry.outputname and not opened: - opened = 1 - self.write("(%s\n" % entry.outputname) - stack.append(entry.name) - line = skip_white(line) - if line[0] != "{": - raise LaTeXFormatError( - "missing content for " + macroname) - self.line = line[1:] - line = self.subconvert("}", len(stack) + depth + 1) - if line and line[0] == "}": - line = line[1:] - elif pentry.type == "text" and pentry.text: - if entry.outputname and not opened: - opened = 1 - stack.append(entry.name) - self.write("(%s\n" % entry.outputname) - #dbgmsg("--- text: %r" % pentry.text) - self.write("-%s\n" % encode(pentry.text)) - elif pentry.type == "entityref": - self.write("&%s\n" % pentry.name) - if entry.outputname: - if not opened: - self.write("(%s\n" % entry.outputname) - stack.append(entry.name) - if not implied_content: - self.write(")%s\n" % entry.outputname) - stack.pop() - continue - if line[0] == endchar and not stack: - self.line = line[1:] - return self.line - if line[0] == "}": - # end of macro or group - macroname = stack[-1] - if macroname: - conversion = self.table[macroname] - if conversion.outputname: - # otherwise, it was just a bare group - self.write(")%s\n" % conversion.outputname) - del stack[-1] - line = line[1:] - continue - if line[0] == "~": - # don't worry about the "tie" aspect of this command - line = line[1:] - self.write("- \n") - continue - if line[0] == "{": - stack.append("") - line = line[1:] - continue - if line[0] == "\\" and line[1] in ESCAPED_CHARS: - self.write("-%s\n" % encode(line[1])) - line = line[2:] - continue - if line[:2] == r"\\": - self.write("(BREAK\n)BREAK\n") - line = line[2:] - continue - if line[:2] == r"\_": - line = "_" + line[2:] - continue - if line[:2] in (r"\'", r'\"'): - # combining characters... - self.combining_char(line[1], line[2]) - line = line[3:] - continue - m = _text_rx.match(line) - if m: - text = encode(m.group()) - self.write("-%s\n" % text) - line = line[m.end():] - continue - # special case because of \item[] - # XXX can we axe this??? - if line[0] == "]": - self.write("-]\n") - line = line[1:] - continue - # avoid infinite loops - extra = "" - if len(line) > 100: - extra = "..." - raise LaTeXFormatError("could not identify markup: %r%s" - % (line[:100], extra)) - while stack: - entry = self.get_entry(stack[-1]) - if entry.closes: - self.write(")%s\n-%s\n" % (entry.outputname, encode("\n"))) - del stack[-1] - else: - break - if stack: - raise LaTeXFormatError("elements remain on stack: " - + ", ".join(stack)) - # otherwise we just ran out of input here... - - # This is a really limited table of combinations, but it will have - # to do for now. - _combinations = { - ("c", "c"): 0x00E7, - ("'", "e"): 0x00E9, - ('"', "o"): 0x00F6, - } - - def combining_char(self, prefix, char): - ordinal = self._combinations[(prefix, char)] - self.write("-\\%%%d;\n" % ordinal) - - def start_macro(self, name): - conversion = self.get_entry(name) - parameters = conversion.parameters - optional = parameters and parameters[0].optional - return parameters, optional, conversion.empty - - def get_entry(self, name): - entry = self.table.get(name) - if entry is None: - dbgmsg("get_entry(%r) failing; building default entry!" % (name, )) - # not defined; build a default entry: - entry = TableEntry(name) - entry.has_content = 1 - entry.parameters.append(Parameter("content")) - self.table[name] = entry - return entry - - def get_env_entry(self, name): - entry = self.table.get(name) - if entry is None: - # not defined; build a default entry: - entry = TableEntry(name, 1) - entry.has_content = 1 - entry.parameters.append(Parameter("content")) - entry.parameters[-1].implied = 1 - self.table[name] = entry - elif not entry.environment: - raise LaTeXFormatError( - name + " is defined as a macro; expected environment") - return entry - - def dump_attr(self, pentry, value): - if not (pentry.name and value): - return - if _token_rx.match(value): - dtype = "TOKEN" - else: - dtype = "CDATA" - self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value))) - - -def convert(ifp, ofp, table): - c = Conversion(ifp, ofp, table) - try: - c.convert() - except IOError, (err, msg): - if err != errno.EPIPE: - raise - - -def skip_white(line): - while line and line[0] in " %\n\t\r": - line = line[1:].lstrip() - return line - - - -class TableEntry: - def __init__(self, name, environment=0): - self.name = name - self.outputname = name - self.environment = environment - self.empty = not environment - self.has_content = 0 - self.verbatim = 0 - self.auto_close = 0 - self.parameters = [] - self.closes = [] - self.endcloses = [] - -class Parameter: - def __init__(self, type, name=None, optional=0): - self.type = type - self.name = name - self.optional = optional - self.text = '' - self.implied = 0 - - -class TableHandler(xml.sax.handler.ContentHandler): - def __init__(self): - self.__table = {} - self.__buffer = '' - self.__methods = {} - - def get_table(self): - for entry in self.__table.values(): - if entry.environment and not entry.has_content: - p = Parameter("content") - p.implied = 1 - entry.parameters.append(p) - entry.has_content = 1 - return self.__table - - def startElement(self, tag, attrs): - try: - start, end = self.__methods[tag] - except KeyError: - start = getattr(self, "start_" + tag, None) - end = getattr(self, "end_" + tag, None) - self.__methods[tag] = (start, end) - if start: - start(attrs) - - def endElement(self, tag): - start, end = self.__methods[tag] - if end: - end() - - def endDocument(self): - self.__methods.clear() - - def characters(self, data): - self.__buffer += data - - def start_environment(self, attrs): - name = attrs["name"] - self.__current = TableEntry(name, environment=1) - self.__current.verbatim = attrs.get("verbatim") == "yes" - if attrs.has_key("outputname"): - self.__current.outputname = attrs.get("outputname") - self.__current.endcloses = attrs.get("endcloses", "").split() - def end_environment(self): - self.end_macro() - - def start_macro(self, attrs): - name = attrs["name"] - self.__current = TableEntry(name) - self.__current.closes = attrs.get("closes", "").split() - if attrs.has_key("outputname"): - self.__current.outputname = attrs.get("outputname") - def end_macro(self): - name = self.__current.name - if self.__table.has_key(name): - raise ValueError("name %r already in use" % (name,)) - self.__table[name] = self.__current - self.__current = None - - def start_attribute(self, attrs): - name = attrs.get("name") - optional = attrs.get("optional") == "yes" - if name: - p = Parameter("attribute", name, optional=optional) - else: - p = Parameter("attribute", optional=optional) - self.__current.parameters.append(p) - self.__buffer = '' - def end_attribute(self): - self.__current.parameters[-1].text = self.__buffer - - def start_entityref(self, attrs): - name = attrs["name"] - p = Parameter("entityref", name) - self.__current.parameters.append(p) - - def start_child(self, attrs): - name = attrs["name"] - p = Parameter("child", name, attrs.get("optional") == "yes") - self.__current.parameters.append(p) - self.__current.empty = 0 - - def start_content(self, attrs): - p = Parameter("content") - p.implied = attrs.get("implied") == "yes" - if self.__current.environment: - p.implied = 1 - self.__current.parameters.append(p) - self.__current.has_content = 1 - self.__current.empty = 0 - - def start_text(self, attrs): - self.__current.empty = 0 - self.__buffer = '' - def end_text(self): - p = Parameter("text") - p.text = self.__buffer - self.__current.parameters.append(p) - - -def load_table(fp): - ch = TableHandler() - xml.sax.parse(fp, ch) - return ch.get_table() - - -def main(): - global DEBUG - # - opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"]) - for opt, arg in opts: - if opt in ("-D", "--debug"): - DEBUG += 1 - if len(args) == 0: - ifp = sys.stdin - ofp = sys.stdout - elif len(args) == 1: - ifp = open(args[0]) - ofp = sys.stdout - elif len(args) == 2: - ifp = open(args[0]) - ofp = open(args[1], "w") - else: - usage() - sys.exit(2) - - table = load_table(open(os.path.join(sys.path[0], 'conversion.xml'))) - convert(ifp, ofp, table) - - -if __name__ == "__main__": - main() diff --git a/Doc/tools/sgmlconv/make.rules b/Doc/tools/sgmlconv/make.rules deleted file mode 100644 index 93579c5..0000000 --- a/Doc/tools/sgmlconv/make.rules +++ /dev/null @@ -1,48 +0,0 @@ -# -*- makefile -*- -# -# Extra magic needed by the LaTeX->XML conversion process. This requires -# $(TOOLSDIR) to be properly defined. - -DOCFIXER= $(TOOLSDIR)/sgmlconv/docfixer.py -ESIS2ML= $(TOOLSDIR)/sgmlconv/esis2sgml.py -LATEX2ESIS= $(TOOLSDIR)/sgmlconv/latex2esis.py -CONVERSION= $(TOOLSDIR)/sgmlconv/conversion.xml - -ESISTARGETS= $(patsubst %.tex,%.esis,$(wildcard *.tex)) -ESIS1TARGETS= $(patsubst %.tex,%.esis1,$(wildcard *.tex)) -XMLTARGETS= $(patsubst %.tex,%.xml,$(wildcard *.tex)) - -L2EFLAGS= - -all: xml - -esis: $(ESISTARGETS) -esis1: $(ESIS1TARGETS) -xml: $(XMLTARGETS) - -ESISTOOLS= $(TOOLSDIR)/sgmlconv/esistools.py - -$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) $(CONVERSION) -$(ESIS1TARGETS): $(LATEX2ESIS) $(CONVERSION) -# This variant is easier to work with while debugging the conversion spec: -#$(ESISTARGETS): $(LATEX2ESIS) $(DOCFIXER) $(ESISTOOLS) -$(XMLTARGETS): $(ESIS2ML) - - -.SUFFIXES: .esis .esis1 .tex .xml - -.tex.esis1: - $(LATEX2ESIS) $(L2EFLAGS) $< $@ - -.esis1.esis: - $(DOCFIXER) $< $@ - -.esis.xml: - $(ESIS2ML) --xml $< $@ - - -clean: - rm -f *.esis *.esis1 - -clobber: clean - rm -f *.xml |