"""
Makes the necesary files to convert from plain html of
Python 1.5 and 1.5.x Documentation to
Microsoft HTML Help format version 1.1
Doesn't change the html's docs.
by hernan.foffani@iname.com
no copyright and no responsabilities.
modified by Dale Nagata for Python 1.5.2
Renamed from make_chm.py to prechm.py, and checked into the Python
project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
Python 2.2 docs, at .
"""
import sys
import os
from formatter import NullWriter, AbstractFormatter
from htmllib import HTMLParser
import getopt
import cgi
usage_mode = '''
Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
-c: does not build filename.hhc (Table of Contents)
-k: does not build filename.hhk (Index)
-p: does not build filename.hhp (Project File)
-v 1.5[.x]: makes help for the python 1.5[.x] docs
(default is python 1.5.2 docs)
'''
# Project file (*.hhp) template. 'arch' is the file basename (like
# the pythlp in pythlp.hhp); 'version' is the doc version number (like
# the 2.2 in Python 2.2).
# The magical numbers in the long line under [WINDOWS] set most of the
# user-visible features (visible buttons, tabs, etc).
# About 0x10384e: This defines the buttons in the help viewer. The
# following defns are taken from htmlhelp.h. Not all possibilities
# actually work, and not all those that work are available from the Help
# Workshop GUI. In particular, the Zoom/Font button works and is not
# available from the GUI. The ones we're using are marked with 'x':
#
# 0x000002 Hide/Show x
# 0x000004 Back x
# 0x000008 Forward x
# 0x000010 Stop
# 0x000020 Refresh
# 0x000040 Home x
# 0x000080 Forward
# 0x000100 Back
# 0x000200 Notes
# 0x000400 Contents
# 0x000800 Locate x
# 0x001000 Options x
# 0x002000 Print x
# 0x004000 Index
# 0x008000 Search
# 0x010000 History
# 0x020000 Favorites
# 0x040000 Jump 1
# 0x080000 Jump 2
# 0x100000 Zoom/Font x
# 0x200000 TOC Next
# 0x400000 TOC Prev
project_template = '''
[OPTIONS]
Compiled file=%(arch)s.chm
Contents file=%(arch)s.hhc
Default Window=%(arch)s
Default topic=index.html
Display compile progress=No
Full text search stop list file=%(arch)s.stp
Full-text search=Yes
Index file=%(arch)s.hhk
Language=0x409
Title=Python %(version)s Documentation
[WINDOWS]
%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
"index.html","index.html",,,,,0x63520,220,0x10384e,[271,372,740,718],,,,,,,0
[FILES]
'''
contents_header = '''\
'''
contents_footer = '''\
'''
object_sitemap = '''\
'''
# List of words the full text search facility shouldn't index. This
# becomes file ARCH.stp. Note that this list must be pretty small!
# Different versions of the MS docs claim the file has a maximum size of
# 256 or 512 bytes (including \r\n at the end of each line).
# Note that "and", "or", "not" and "near" are operators in the search
# language, so no point indexing them even if we wanted to.
stop_list = '''
a an and
is
near
not
of
or
the
'''
# s is a string or None. If None or empty, return None. Else tack '.html'
# on to the end, unless it's already there.
def addhtml(s):
if s:
if not s.endswith('.html'):
s += '.html'
return s
# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
# directory in Python terms.
class Book:
def __init__(self, directory, title, firstpage,
contentpage=None, indexpage=None):
self.directory = directory
self.title = title
self.firstpage = addhtml(firstpage)
self.contentpage = addhtml(contentpage)
self.indexpage = addhtml(indexpage)
# Library Doc list of books:
# each 'book' : (Dir, Title, First page, Content page, Index page)
supported_libraries = {
'2.2':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('whatsnew', "What's New", 'index', 'contents'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index'),
],
'2.1.1':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index'),
],
'2.0.0':
[
Book('.', 'Global Module Index', 'modindex'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'contents'),
Book('dist','Distributing Python Modules', 'dist', 'contents'),
],
# Apr 17/99: library for 1.5.2 version:
# May 01/99: library for 1.5.2 (04/30/99):
'1.5.2':
[
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents')
],
# library for 1.5.1 version:
'1.5.1':
[
Book('tut','Tutorial','tut','contents'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex')
],
# library for 1.5 version:
'1.5':
[
Book('tut','Tutorial','tut','node1'),
Book('lib','Library Reference','lib','node1','node268'),
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
Book('ext','Extending and Embedding','ext','node1'),
Book('api','Python/C API','api','node1','node48')
]
}
# AlmostNullWriter doesn't print anything; it just arranges to save the
# text sent to send_flowing_data(). This is used to capture the text
# between an anchor begin/end pair, e.g. for TOC entries.
class AlmostNullWriter(NullWriter):
def __init__(self):
NullWriter.__init__(self)
self.saved_clear()
def send_flowing_data(self, data):
stripped = data.strip()
if stripped: # don't bother to save runs of whitespace
self.saved.append(stripped)
# Forget all saved text.
def saved_clear(self):
self.saved = []
# Return all saved text as a string.
def saved_get(self):
return ' '.join(self.saved)
class HelpHtmlParser(HTMLParser):
def __init__(self, formatter, path, output):
HTMLParser.__init__(self, formatter)
self.path = path # relative path
self.ft = output # output file
self.indent = 0 # number of tabs for pretty printing of files
self.proc = False # True when actively processing, else False
# (headers, footers, etc)
# XXX This shouldn't need to be a stack -- anchors shouldn't nest.
# XXX See SF bug .
self.hrefstack = [] # stack of hrefs from anchor begins
def begin_group(self):
self.indent += 1
self.proc = True
def finish_group(self):
self.indent -= 1
# stop processing when back to top level
self.proc = self.indent > 0
def anchor_bgn(self, href, name, type):
if self.proc:
# XXX See SF bug .
# XXX index.html for the 2.2.1 language reference manual contains
# XXX nested tags in the entry for the section on blank
# XXX lines. We want to ignore the nested part completely.
if len(self.hrefstack) == 0:
self.saved_clear()
self.hrefstack.append(href)
def anchor_end(self):
if self.proc:
# XXX See XXX above.
if self.hrefstack:
title = cgi.escape(self.saved_get(), True)
path = self.path + '/' + self.hrefstack.pop()
self.tab(object_sitemap % (title, path))
def start_dl(self, atr_val):
self.begin_group()
def end_dl(self):
self.finish_group()
def do_dt(self, atr_val):
# no trailing newline on purpose!
self.tab("
")
# Write text to output file.
def write(self, text):
self.ft.write(text)
# Write text to output file after indenting by self.indent tabs.
def tab(self, text=''):
self.write('\t' * self.indent)
if text:
self.write(text)
# Forget all saved text.
def saved_clear(self):
self.formatter.writer.saved_clear()
# Return all saved text as a string.
def saved_get(self):
return self.formatter.writer.saved_get()
class IdxHlpHtmlParser(HelpHtmlParser):
# nothing special here, seems enough with parent class
pass
class TocHlpHtmlParser(HelpHtmlParser):
def start_dl(self, atr_val):
self.begin_group()
self.tab('
\n')
for book in library:
print '\t', book.title, '-', book.indexpage
if book.indexpage:
index(book.directory, book.indexpage, output)
output.write('
\n')
def do_content(library, version, output):
output.write(contents_header)
for book in library:
print '\t', book.title, '-', book.firstpage
path = book.directory + "/" + book.firstpage
output.write('
')
output.write(object_sitemap % (book.title, path))
if book.contentpage:
content(book.directory, book.contentpage, output)
output.write(contents_footer)
# Fill in the [FILES] section of the project (.hhp) file.
# 'library' is the list of directory description tuples from
# supported_libraries for the version of the docs getting generated.
def do_project(library, output, arch, version):
output.write(project_template % locals())
pathseen = {}
for book in library:
directory = book.directory
path = directory + '\\%s\n'
for page in os.listdir(directory):
if page.endswith('.html') or page.endswith('.css'):
fullpath = path % page
if fullpath not in pathseen:
output.write(fullpath)
pathseen[fullpath] = True
def openfile(file):
try:
p = open(file, "w")
except IOError, msg:
print file, ":", msg
sys.exit(1)
return p
def usage():
print usage_mode
sys.exit(0)
def do_it(args = None):
if not args:
args = sys.argv[1:]
if not args:
usage()
try:
optlist, args = getopt.getopt(args, 'ckpv:')
except getopt.error, msg:
print msg
usage()
if not args or len(args) > 1:
usage()
arch = args[0]
version = None
for opt in optlist:
if opt[0] == '-v':
version = opt[1]
break
if not version:
usage()
library = supported_libraries[version]
if not (('-p','') in optlist):
fname = arch + '.stp'
f = openfile(fname)
print "Building stoplist", fname, "..."
words = stop_list.split()
words.sort()
for word in words:
print >> f, word
f.close()
f = openfile(arch + '.hhp')
print "Building Project..."
do_project(library, f, arch, version)
if version == '2.0.0':
for image in os.listdir('icons'):
f.write('icons'+ '\\' + image + '\n')
f.close()
if not (('-c','') in optlist):
f = openfile(arch + '.hhc')
print "Building Table of Content..."
do_content(library, version, f)
f.close()
if not (('-k','') in optlist):
f = openfile(arch + '.hhk')
print "Building Index..."
do_index(library, f)
f.close()
if __name__ == '__main__':
do_it()