Stopped all warnings from the HTML Help Compiler, by generating proper

HTML (or, at least, proper in its view). The TOC file is now identical to what the HTML compiler itself generates, except for whitespace and a glitch identified below. The pretty-printing done by prechm.py is pretty much destroyed for now; if you need it pretty-printed, just make the Help Compiler save the files (it's got its own idea of pretty- printing anyway). Glitch: The title of Ref Man "2.1.6 Blank lines" shows up as a blank for now. This is because the relevant entry in ref/index.html contains nested anchors, and pychm really has no idea what to do with that. I hacked it for now to avoid any error messages or worse insanity, and filed a bug report against the docs.
author: Tim Peters <tim.peters@gmail.com> 2002-04-20 20:26:26 (GMT)
committer: Tim Peters <tim.peters@gmail.com> 2002-04-20 20:26:26 (GMT)
commit: 454540774618c35374ecd0042ab689692040c425 (patch)
tree: c2851970eff54e1e7ceeb9de2bbf177c4206c2de /Doc/tools
parent: 3d94942000dde4266487969eb75f27fd3d46099f (diff)
download: cpython-454540774618c35374ecd0042ab689692040c425.zip
cpython-454540774618c35374ecd0042ab689692040c425.tar.gz
cpython-454540774618c35374ecd0042ab689692040c425.tar.bz2
1 files changed, 34 insertions, 21 deletions
diff --git a/Doc/tools/prechm.py b/Doc/tools/prechm.py
index 675e400..59a2d21 100644
--- a/Doc/tools/prechm.py
+++ b/Doc/tools/prechm.py
@@ -19,8 +19,8 @@ import sys
 import os
 from formatter import NullWriter, AbstractFormatter
 from htmllib import HTMLParser
-import string
 import getopt
+import cgi
 
 usage_mode = '''
 Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
@@ -56,31 +56,36 @@ Title=Python %(version)s Documentation
 [FILES]
 '''
 
-contents_header = '''
+contents_header = '''\
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<HTML>
+<HEAD>
+<meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
+<!-- Sitemap 1.0 -->
+</HEAD><BODY>
 <OBJECT type="text/site properties">
 	<param name="Window Styles" value="0x801227">
 	<param name="ImageType" value="Folder">
 </OBJECT>
 <UL>
-<LI> <OBJECT type="text/sitemap">
-	<param name="Name" value="Python %s Docs">
-	<param name="Local" value="./index.html">
-	</OBJECT>
+<LI><OBJECT type="text/sitemap">
+       <param name="Name" value="Python %s Docs">
+       <param name="Local" value="./index.html">
+    </OBJECT>
 <UL>
 '''
 
-contents_footer = '''
-</UL></UL>
+contents_footer = '''\
+</UL></UL></BODY></HTML>
 '''
 
-object_sitemap = '''
-    <LI> <OBJECT type="text/sitemap">
-        <param name="Local" value="%s">
-        <param name="Name" value="%s">
-        </OBJECT>
+object_sitemap = '''\
+<OBJECT type="text/sitemap">
+    <param name="Name" value="%s">
+    <param name="Local" value="%s">
+</OBJECT>
 '''
 
-
 # List of words the full text search facility shouldn't index.  This
 # becomes file ARCH.stp.  Note that this list must be pretty small!
 # Different versions of the MS docs claim the file has a maximum size of
@@ -228,6 +233,9 @@ class HelpHtmlParser(HTMLParser):
         self.indent = 0     # number of tabs for pretty printing of files
         self.proc = False   # True when actively processing, else False
                             # (headers, footers, etc)
+        # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
+        # XXX See SF bug <http://www.python.org/sf/546579>.
+        self.hrefstack = [] # stack of hrefs from anchor begins
 
     def begin_group(self):
         self.indent += 1
@@ -241,14 +249,18 @@ class HelpHtmlParser(HTMLParser):
     def anchor_bgn(self, href, name, type):
         if self.proc:
             self.saved_clear()
-            self.write('<OBJECT type="text/sitemap">\n')
-            self.tab('\t<param name="Local" value="%s/%s">\n' %
-                     (self.path, href))
+            self.hrefstack.append(href)
 
     def anchor_end(self):
         if self.proc:
-            self.tab('\t<param name="Name" value="%s">\n' % self.saved_get())
-            self.tab('\t</OBJECT>\n')
+            title = cgi.escape(self.saved_get(), True)
+            path = self.path + '/' + self.hrefstack.pop()
+            # XXX See SF bug <http://www.python.org/sf/546579>.
+            # XXX index.html for the 2.2 language reference manual contains
+            # XXX nested <a></a> tags in the entry for the section on blank
+            # XXX lines.  We want to ignore the nested part completely.
+            if len(self.hrefstack) == 0:
+                self.tab(object_sitemap % (title, path))
 
     def start_dl(self, atr_val):
         self.begin_group()
@@ -332,8 +344,9 @@ def do_content(library, version, output):
     output.write(contents_header % version)
     for book in library:
         print '\t', book.title, '-', book.firstpage
-        output.write(object_sitemap % (book.directory + "/" + book.firstpage,
-                                       book.title))
+        path = book.directory + "/" + book.firstpage
+        output.write('<LI>')
+        output.write(object_sitemap % (book.title, path))
         if book.contentpage:
             content(book.directory, book.contentpage, output)
     output.write(contents_footer)
author	Tim Peters <tim.peters@gmail.com>	2002-04-20 20:26:26 (GMT)
committer	Tim Peters <tim.peters@gmail.com>	2002-04-20 20:26:26 (GMT)
commit	454540774618c35374ecd0042ab689692040c425 (patch)
tree	c2851970eff54e1e7ceeb9de2bbf177c4206c2de /Doc/tools
parent	3d94942000dde4266487969eb75f27fd3d46099f (diff)
download	cpython-454540774618c35374ecd0042ab689692040c425.zip cpython-454540774618c35374ecd0042ab689692040c425.tar.gz cpython-454540774618c35374ecd0042ab689692040c425.tar.bz2