""" Makes the necesary files to convert from plain html of Python 1.5 and 1.5.x Documentation to Microsoft HTML Help format version 1.1 Doesn't change the html's docs. by hernan.foffani@iname.com no copyright and no responsabilities. modified by Dale Nagata for Python 1.5.2 Renamed from make_chm.py to prechm.py, and checked into the Python project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim and Fred Drake. Obtained from Robin Dunn's .chm packaging of the Python 2.2 docs, at . """ import sys import os from formatter import NullWriter, AbstractFormatter from htmllib import HTMLParser import getopt import cgi usage_mode = ''' Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename -c: does not build filename.hhc (Table of Contents) -k: does not build filename.hhk (Index) -p: does not build filename.hhp (Project File) -v 1.5[.x]: makes help for the python 1.5[.x] docs (default is python 1.5.2 docs) ''' # Project file (*.hhp) template. 'arch' is the file basename (like # the pythlp in pythlp.hhp); 'version' is the doc version number (like # the 2.2 in Python 2.2). # The magical numbers in the long line under [WINDOWS] set most of the # user-visible features (visible buttons, tabs, etc). # About 0x10384e: This defines the buttons in the help viewer. The # following defns are taken from htmlhelp.h. Not all possibilities # actually work, and not all those that work are available from the Help # Workshop GUI. In particular, the Zoom/Font button works and is not # available from the GUI. The ones we're using are marked with 'x': # # 0x000002 Hide/Show x # 0x000004 Back x # 0x000008 Forward x # 0x000010 Stop # 0x000020 Refresh # 0x000040 Home x # 0x000080 Forward # 0x000100 Back # 0x000200 Notes # 0x000400 Contents # 0x000800 Locate x # 0x001000 Options x # 0x002000 Print x # 0x004000 Index # 0x008000 Search # 0x010000 History # 0x020000 Favorites # 0x040000 Jump 1 # 0x080000 Jump 2 # 0x100000 Zoom/Font x # 0x200000 TOC Next # 0x400000 TOC Prev project_template = ''' [OPTIONS] Compiled file=%(arch)s.chm Contents file=%(arch)s.hhc Default Window=%(arch)s Default topic=index.html Display compile progress=No Full text search stop list file=%(arch)s.stp Full-text search=Yes Index file=%(arch)s.hhk Language=0x409 Title=Python %(version)s Documentation [WINDOWS] %(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\ "index.html","index.html",,,,,0x63520,220,0x10384e,[271,372,740,718],,,,,,,0 [FILES] ''' contents_header = '''\

''' contents_footer = '''\ ''' object_sitemap = '''\

''' # List of words the full text search facility shouldn't index. This # becomes file ARCH.stp. Note that this list must be pretty small! # Different versions of the MS docs claim the file has a maximum size of # 256 or 512 bytes (including \r\n at the end of each line). # Note that "and", "or", "not" and "near" are operators in the search # language, so no point indexing them even if we wanted to. stop_list = ''' a an and is near not of or the ''' # s is a string or None. If None or empty, return None. Else tack '.html' # on to the end, unless it's already there. def addhtml(s): if s: if not s.endswith('.html'): s += '.html' return s # Convenience class to hold info about "a book" in HTMLHelp terms == a doc # directory in Python terms. class Book: def __init__(self, directory, title, firstpage, contentpage=None, indexpage=None): self.directory = directory self.title = title self.firstpage = addhtml(firstpage) self.contentpage = addhtml(contentpage) self.indexpage = addhtml(indexpage) # Library Doc list of books: # each 'book' : (Dir, Title, First page, Content page, Index page) supported_libraries = { '2.2': [ Book('.', 'Main page', 'index'), Book('.', 'Global Module Index', 'modindex'), Book('whatsnew', "What's New", 'index', 'contents'), Book('tut','Tutorial','tut','node2'), Book('lib','Library Reference','lib','contents','genindex'), Book('ref','Language Reference','ref','contents','genindex'), Book('mac','Macintosh Reference','mac','contents','genindex'), Book('ext','Extending and Embedding','ext','contents'), Book('api','Python/C API','api','contents','genindex'), Book('doc','Documenting Python','doc','contents'), Book('inst','Installing Python Modules', 'inst', 'index'), Book('dist','Distributing Python Modules', 'dist', 'index'), ], '2.1.1': [ Book('.', 'Main page', 'index'), Book('.', 'Global Module Index', 'modindex'), Book('tut','Tutorial','tut','node2'), Book('lib','Library Reference','lib','contents','genindex'), Book('ref','Language Reference','ref','contents','genindex'), Book('mac','Macintosh Reference','mac','contents','genindex'), Book('ext','Extending and Embedding','ext','contents'), Book('api','Python/C API','api','contents','genindex'), Book('doc','Documenting Python','doc','contents'), Book('inst','Installing Python Modules', 'inst', 'index'), Book('dist','Distributing Python Modules', 'dist', 'index'), ], '2.0.0': [ Book('.', 'Global Module Index', 'modindex'), Book('tut','Tutorial','tut','node2'), Book('lib','Library Reference','lib','contents','genindex'), Book('ref','Language Reference','ref','contents','genindex'), Book('mac','Macintosh Reference','mac','contents','genindex'), Book('ext','Extending and Embedding','ext','contents'), Book('api','Python/C API','api','contents','genindex'), Book('doc','Documenting Python','doc','contents'), Book('inst','Installing Python Modules', 'inst', 'contents'), Book('dist','Distributing Python Modules', 'dist', 'contents'), ], # Apr 17/99: library for 1.5.2 version: # May 01/99: library for 1.5.2 (04/30/99): '1.5.2': [ Book('tut','Tutorial','tut','node2'), Book('lib','Library Reference','lib','contents','genindex'), Book('ref','Language Reference','ref','contents','genindex'), Book('mac','Macintosh Reference','mac','contents','genindex'), Book('ext','Extending and Embedding','ext','contents'), Book('api','Python/C API','api','contents','genindex'), Book('doc','Documenting Python','doc','contents') ], # library for 1.5.1 version: '1.5.1': [ Book('tut','Tutorial','tut','contents'), Book('lib','Library Reference','lib','contents','genindex'), Book('ref','Language Reference','ref-1','ref-2','ref-11'), Book('ext','Extending and Embedding','ext','contents'), Book('api','Python/C API','api','contents','genindex') ], # library for 1.5 version: '1.5': [ Book('tut','Tutorial','tut','node1'), Book('lib','Library Reference','lib','node1','node268'), Book('ref','Language Reference','ref-1','ref-2','ref-11'), Book('ext','Extending and Embedding','ext','node1'), Book('api','Python/C API','api','node1','node48') ] } # AlmostNullWriter doesn't print anything; it just arranges to save the # text sent to send_flowing_data(). This is used to capture the text # between an anchor begin/end pair, e.g. for TOC entries. class AlmostNullWriter(NullWriter): def __init__(self): NullWriter.__init__(self) self.saved_clear() def send_flowing_data(self, data): stripped = data.strip() if stripped: # don't bother to save runs of whitespace self.saved.append(stripped) # Forget all saved text. def saved_clear(self): self.saved = [] # Return all saved text as a string. def saved_get(self): return ' '.join(self.saved) class HelpHtmlParser(HTMLParser): def __init__(self, formatter, path, output): HTMLParser.__init__(self, formatter) self.path = path # relative path self.ft = output # output file self.indent = 0 # number of tabs for pretty printing of files self.proc = False # True when actively processing, else False # (headers, footers, etc) # XXX This shouldn't need to be a stack -- anchors shouldn't nest. # XXX See SF bug . self.hrefstack = [] # stack of hrefs from anchor begins def begin_group(self): self.indent += 1 self.proc = True def finish_group(self): self.indent -= 1 # stop processing when back to top level self.proc = self.indent > 0 def anchor_bgn(self, href, name, type): if self.proc: # XXX See SF bug . # XXX index.html for the 2.2.1 language reference manual contains # XXX nested tags in the entry for the section on blank # XXX lines. We want to ignore the nested part completely. if len(self.hrefstack) == 0: self.saved_clear() self.hrefstack.append(href) def anchor_end(self): if self.proc: # XXX See XXX above. if self.hrefstack: title = cgi.escape(self.saved_get(), True) path = self.path + '/' + self.hrefstack.pop() self.tab(object_sitemap % (title, path)) def start_dl(self, atr_val): self.begin_group() def end_dl(self): self.finish_group() def do_dt(self, atr_val): # no trailing newline on purpose! self.tab("

") # Write text to output file. def write(self, text): self.ft.write(text) # Write text to output file after indenting by self.indent tabs. def tab(self, text=''): self.write('\t' * self.indent) if text: self.write(text) # Forget all saved text. def saved_clear(self): self.formatter.writer.saved_clear() # Return all saved text as a string. def saved_get(self): return self.formatter.writer.saved_get() class IdxHlpHtmlParser(HelpHtmlParser): # nothing special here, seems enough with parent class pass class TocHlpHtmlParser(HelpHtmlParser): def start_dl(self, atr_val): self.begin_group() self.tab('

\n') def end_dl(self): self.finish_group() self.tab('\n') def start_ul(self, atr_val): self.begin_group() self.tab('

\n') def end_ul(self): self.finish_group() self.tab('\n') def do_li(self, atr_val): # no trailing newline on purpose! self.tab("

") def index(path, indexpage, output): parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), path, output) f = open(path + '/' + indexpage) parser.feed(f.read()) parser.close() f.close() def content(path, contentpage, output): parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), path, output) f = open(path + '/' + contentpage) parser.feed(f.read()) parser.close() f.close() def do_index(library, output): output.write('

\n') for book in library: print '\t', book.title, '-', book.indexpage if book.indexpage: index(book.directory, book.indexpage, output) output.write('\n') def do_content(library, version, output): output.write(contents_header) for book in library: print '\t', book.title, '-', book.firstpage path = book.directory + "/" + book.firstpage output.write('

') output.write(object_sitemap % (book.title, path)) if book.contentpage: content(book.directory, book.contentpage, output) output.write(contents_footer) # Fill in the [FILES] section of the project (.hhp) file. # 'library' is the list of directory description tuples from # supported_libraries for the version of the docs getting generated. def do_project(library, output, arch, version): output.write(project_template % locals()) pathseen = {} for book in library: directory = book.directory path = directory + '\\%s\n' for page in os.listdir(directory): if page.endswith('.html') or page.endswith('.css'): fullpath = path % page if fullpath not in pathseen: output.write(fullpath) pathseen[fullpath] = True def openfile(file): try: p = open(file, "w") except IOError, msg: print file, ":", msg sys.exit(1) return p def usage(): print usage_mode sys.exit(0) def do_it(args = None): if not args: args = sys.argv[1:] if not args: usage() try: optlist, args = getopt.getopt(args, 'ckpv:') except getopt.error, msg: print msg usage() if not args or len(args) > 1: usage() arch = args[0] version = None for opt in optlist: if opt[0] == '-v': version = opt[1] break if not version: usage() library = supported_libraries[version] if not (('-p','') in optlist): fname = arch + '.stp' f = openfile(fname) print "Building stoplist", fname, "..." words = stop_list.split() words.sort() for word in words: print >> f, word f.close() f = openfile(arch + '.hhp') print "Building Project..." do_project(library, f, arch, version) if version == '2.0.0': for image in os.listdir('icons'): f.write('icons'+ '\\' + image + '\n') f.close() if not (('-c','') in optlist): f = openfile(arch + '.hhc') print "Building Table of Content..." do_content(library, version, f) f.close() if not (('-k','') in optlist): f = openfile(arch + '.hhk') print "Building Index..." do_index(library, f) f.close() if __name__ == '__main__': do_it()