summaryrefslogtreecommitdiffstats
path: root/Tools/scripts/texi2html.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1995-08-28 03:01:00 (GMT)
committerGuido van Rossum <guido@python.org>1995-08-28 03:01:00 (GMT)
commit06f4289d3fa50f500e4053546ea4c96c4729dd19 (patch)
treee7f839e97588b21b6668807af2bdf193cdcd9db6 /Tools/scripts/texi2html.py
parentd639b45ed6a04705ae1952e1da801093cc8ee300 (diff)
downloadcpython-06f4289d3fa50f500e4053546ea4c96c4729dd19.zip
cpython-06f4289d3fa50f500e4053546ea4c96c4729dd19.tar.gz
cpython-06f4289d3fa50f500e4053546ea4c96c4729dd19.tar.bz2
many fixes by anonymous contributor
Diffstat (limited to 'Tools/scripts/texi2html.py')
-rwxr-xr-xTools/scripts/texi2html.py370
1 files changed, 298 insertions, 72 deletions
diff --git a/Tools/scripts/texi2html.py b/Tools/scripts/texi2html.py
index 10d955c..a1c0ebd 100755
--- a/Tools/scripts/texi2html.py
+++ b/Tools/scripts/texi2html.py
@@ -2,7 +2,7 @@
# Convert GNU texinfo files into HTML, one file per node.
# Based on Texinfo 2.14.
-# Usage: texi2html [-d] [-d] inputfile outputdirectory
+# Usage: texi2html [-d] [-d] [-c] inputfile outputdirectory
# The input file must be a complete texinfo file, e.g. emacs.texi.
# This creates many files (one per info node) in the output directory,
# overwriting existing files of the same name. All files created have
@@ -17,12 +17,23 @@
# - handle @exdent properly
# - add links directly to the proper line from indices
# - check against the definitive list of @-cmds; we still miss (among others):
-# - @set, @clear, @ifset, @ifclear
# - @defindex (hard)
# - @c(omment) in the middle of a line (rarely used)
# - @this* (not really needed, only used in headers anyway)
# - @today{} (ever used outside title page?)
+# More consistent handling of chapters/sections/etc.
+# Lots of documentation
+# Many more options:
+# -top designate top node
+# -links customize which types of links are included
+# -split split at chapters or sections instead of nodes
+# -name Allow different types of filename handling. Non unix systems
+# will have problems with long node names
+# ...
+# Support the most recent texinfo version and take a good look at HTML 3.0
+# More debugging output (customizable) and more fexible error handling
+# How about icons ?
import os
import regex
@@ -39,6 +50,81 @@ miprog = regex.compile( \
'^\* \([^:]*\):\(:\|[ \t]*\([^\t,\n.]+\)\([^ \t\n]*\)\)[ \t\n]*')
# menu item (Yuck!)
+
+class Node:
+ __doc__ = """
+ Some of the parser's functionality is separated into this class.
+
+ A Node accumulates its contents, takes care of links to other Nodes
+ and saves itself when it is finished and all links are resolved. """
+
+ def __init__ (self, dir, name, topname, title, next, prev, up):
+ self.dirname = dir
+ self.name = name
+ if topname:
+ self.topname = topname
+ else:
+ self.topname = name
+ self.title = title
+ self.next = next
+ self.prev = prev
+ self.up = up
+ self.lines = []
+ self.type = 0
+ self.cont = ''
+
+ def write (self, *lines):
+ for line in lines:
+ self.lines.append (line)
+
+ def flush (self):
+ fp = open (self.dirname + '/' + makefile(self.name), 'w')
+ fp.write (self.prologue)
+ fp.write (self.text)
+ fp.write (self.epilogue)
+ fp.close ()
+
+
+ def link(self, label, nodename):
+ if nodename:
+ if string.lower(nodename) == '(dir)':
+ addr = '../dir.html'
+ else:
+ addr = makefile(nodename)
+ self.write(label, ': <A HREF="', addr, '" TYPE="', \
+ label, '">', nodename, '</A> \n')
+
+
+ def finalize(self):
+ length = len (self.lines)
+ self.text = string.joinfields (self.lines, '')
+ self.lines = []
+ self.write ('<BR> <HR>\n')
+ if self.cont != self.next:
+ self.link('Cont', self.cont)
+ self.link('Next', self.next)
+ self.link('Prev', self.prev)
+ self.link('Up', self.up)
+ if self.name <> self.topname:
+ self.link('Top', self.topname)
+ self.write ('<BR> <HR> <P>\n')
+ links = string.joinfields (self.lines, '')
+ self.lines = []
+
+ self.prologue = \
+ '<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 2.0//EN">\n' + \
+ '<!- Converted with texi2html and Python>\n' + \
+ '<P>\n<HEAD>\n' + \
+ '<TITLE>' + self.title + '</TITLE>\n' + \
+ '</HEAD>\n<BODY>\n<P>\n' + \
+ links
+
+ if length > 20:
+ self.epilogue = links + '</BODY>\n'
+ else:
+ self.epilogue = '</BODY>\n'
+
+
class TexinfoParser:
# Initialize an instance
@@ -50,6 +136,7 @@ class TexinfoParser:
self.nodelineno = 0 # Linenumber relative to node
self.links = None # Links from current node
self.savetext = None # If not None, save text head instead
+ self.savestack = [] # If not None, save text head instead
self.dirname = 'tmp' # directory where files are created
self.includedir = '.' # directory to search @include files
self.nodename = '' # name of current node
@@ -59,14 +146,17 @@ class TexinfoParser:
self.contents = [] # Reset table of contents
self.numbering = [] # Reset section numbering counters
self.nofill = 0 # Normal operation: fill paragraphs
- self.goodset=['html'] # Names that should be parsed in ifset
+ self.values={'html': 1} # Names that should be parsed in ifset
self.stackinfo={} # Keep track of state in the stack
# XXX The following should be reset per node?!
self.footnotes = [] # Reset list of footnotes
self.itemarg = None # Reset command used by @item
self.itemnumber = None # Reset number for @item in @enumerate
self.itemindex = None # Reset item index name
-
+ self.node = None
+ self.nodestack = []
+ self.cont = 0
+ self.includedepth = 0
# Set (output) directory name
def setdirname(self, dirname):
self.dirname = dirname
@@ -132,27 +222,42 @@ class TexinfoParser:
if self.stack:
print '*** Stack not empty at the end'
print '***', self.stack
+ if self.includedepth == 0:
+ while self.nodestack:
+ self.nodestack[-1].finalize ()
+ self.nodestack[-1].flush ()
+ del self.nodestack [-1]
# Start saving text in a buffer instead of writing it to a file
def startsaving(self):
if self.savetext <> None:
- print '*** Recursively saving text, expect trouble'
+ self.savestack.append (self.savetext)
+ # print '*** Recursively saving text, expect trouble'
self.savetext = ''
# Return the text saved so far and start writing to file again
def collectsavings(self):
savetext = self.savetext
- self.savetext = None
+ if len (self.savestack) > 0:
+ self.savetext = self.savestack[-1]
+ del self.savestack[-1]
+ else:
+ self.savetext = None
return savetext or ''
# Write text to file, or save it in a buffer, or ignore it
def write(self, *args):
- text = string.joinfields(args, '')
+ try:
+ text = string.joinfields(args, '')
+ except:
+ print args
+ raise TypeError
if self.savetext <> None:
self.savetext = self.savetext + text
elif self.nodefp:
self.nodefp.write(text)
-
+ elif self.node:
+ self.node.write (text)
# Complete the current node -- write footnotes and close file
def endnode(self):
if self.savetext <> None:
@@ -172,7 +277,16 @@ class TexinfoParser:
self.write ('<HR>\n')
self.write('</BODY>\n')
self.nodefp.close()
- self.nodefp = None
+ self.nodefp = None
+ elif self.node:
+ if not self.cont and \
+ (not self.node.type or \
+ (self.node.next and self.node.prev and self.node.up)):
+ self.node.finalize ()
+ self.node.flush ()
+ else:
+ self.nodestack.append (self.node)
+ self.node = None
self.nodename = ''
# Process a list of lines, expanding embedded @-commands
@@ -341,7 +455,9 @@ class TexinfoParser:
save_done = self.done
save_skip = self.skip
save_stack = self.stack
+ self.includedepth = self.includedepth + 1
self.parserest(fp, 0)
+ self.includedepth = self.includedepth - 1
fp.close()
self.done = save_done
self.skip = save_skip
@@ -364,7 +480,9 @@ class TexinfoParser:
def close_TeX(self): pass
def handle_copyright(self): self.write('(C)')
-
+ def open_copyright(self): self.write('(C)')
+ def close_copyright(self): pass
+
def open_minus(self): self.write('-')
def close_minus(self): pass
@@ -466,17 +584,19 @@ class TexinfoParser:
close_i = close_emph
def open_footnote(self):
- if self.savetext <> None:
- print '*** Recursive footnote -- expect weirdness'
+ # if self.savetext <> None:
+ # print '*** Recursive footnote -- expect weirdness'
id = len(self.footnotes) + 1
self.write('<A NAME="footnoteref', `id`, \
'" HREF="#footnotetext', `id`, '">(', `id`, ')</A>')
- self.savetext = ''
+ # self.savetext = ''
+ self.startsaving ()
def close_footnote(self):
id = len(self.footnotes) + 1
- self.footnotes.append(`id`, self.savetext)
- self.savetext = None
+ # self.footnotes.append(`id`, self.savetext)
+ self.footnotes.append(`id`, self.collectsavings())
+ # self.savetext = None
def writefootnotes(self):
self.write('<H2>---------- Footnotes ----------</H2>\n')
@@ -534,7 +654,9 @@ class TexinfoParser:
try:
func = getattr(self, 'bgn_' + cmd)
except AttributeError:
- self.unknown_cmd(cmd, args)
+ # don't complain if we are skipping anyway
+ if not self.skip:
+ self.unknown_cmd(cmd, args)
return
self.stack.append(cmd)
func(args)
@@ -593,16 +715,22 @@ class TexinfoParser:
def bgn_tex(self, args): self.skip = self.skip + 1
def end_tex(self): self.skip = self.skip - 1
- def bgn_set(self, args):
- if args not in self.goodset:
- self.gooset.append(args)
+ def do_set(self, args):
+ fields = string.splitfields (args, ' ')
+ key = fields[0]
+ if len(fields) == 1:
+ value = 1
+ else:
+ value = string.joinfields (fields[1:], ' ')
+ self.values[key]=value
+ print self.values
- def bgn_clear(self, args):
- if args in self.goodset:
- self.gooset.remove(args)
+ def do_clear(self, args):
+ self.values[args] = None
def bgn_ifset(self, args):
- if args not in self.goodset:
+ if args not in self.values.keys() \
+ or self.values[args] is None:
self.skip = self.skip + 1
self.stackinfo[len(self.stack)] = 1
else:
@@ -615,14 +743,25 @@ class TexinfoParser:
del self.stackinfo[len(self.stack) + 1]
def bgn_ifclear(self, args):
- if args in self.goodset:
+ if args in self.values.keys() \
+ and self.values[args] is not None:
self.skip = self.skip + 1
self.stackinfo[len(self.stack)] = 1
else:
self.stackinfo[len(self.stack)] = 0
end_ifclear = end_ifset
-
+
+ def open_value(self):
+ self.startsaving()
+
+ def close_value(self):
+ key = self.collectsavings ()
+ if key in self.values.keys():
+ self.write (self.values[key])
+ else:
+ print '*** Undefined value: ', key
+
# --- Beginning a file ---
do_finalout = do_comment
@@ -630,20 +769,25 @@ class TexinfoParser:
do_setfilename = do_comment
def do_settitle(self, args):
- self.title = args
-
+ print args
+ self.startsaving()
+ self.expand (args)
+ self.title = self.collectsavings ()
+ print self.title
def do_parskip(self, args): pass
# --- Ending a file ---
def do_bye(self, args):
+ self.endnode ()
self.done = 1
# --- Title page ---
def bgn_titlepage(self, args): self.skip = self.skip + 1
def end_titlepage(self): self.skip = self.skip - 1
-
+ def do_shorttitlepage(self, args): pass
+
def do_center(self, args):
# Actually not used outside title page...
self.write('<H1>')
@@ -685,24 +829,28 @@ class TexinfoParser:
else:
if self.debugging: print '--- writing', file
self.filenames[file] = 1
- self.nodefp = open(file, 'w')
+ # self.nodefp = open(file, 'w')
self.nodename = name
+ if self.cont and self.nodestack:
+ self.nodestack[-1].cont = self.nodename
if not self.topname: self.topname = name
title = name
if self.title: title = title + ' -- ' + self.title
# No idea what this means, but this is what latex2html writes
- self.write('<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 2.0//EN">\n')
- self.write('<!- Converted with texi2html and Python>\n')
- self.write ('<P>\n<HEAD>\n')
- self.write('<TITLE>', title, '</TITLE>\n')
- self.write ('</HEAD>\n<BODY>\n<P>\n<BR> <HR>\n')
- self.link('Next', next)
- self.link('Prev', prev)
- self.link('Up', up)
- if self.nodename <> self.topname:
- self.link('Top', self.topname)
- self.write ('<BR> <HR> <P>\n')
-
+ # self.write('<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 2.0//EN">\n')
+ # self.write('<!- Converted with texi2html and Python>\n')
+ # self.write ('<P>\n<HEAD>\n')
+ # self.write('<TITLE>', title, '</TITLE>\n')
+ # self.write ('</HEAD>\n<BODY>\n<P>\n<BR> <HR>\n')
+ # self.link('Next', next)
+ # self.link('Prev', prev)
+ # self.link('Up', up)
+ # if self.nodename <> self.topname:
+ # self.link('Top', self.topname)
+ # self.write ('<BR> <HR> <P>\n')
+ self.node = Node (self.dirname, self.nodename, self.topname, \
+ title, next, prev, up)
+
def link(self, label, nodename):
if nodename:
if string.lower(nodename) == '(dir)':
@@ -714,12 +862,37 @@ class TexinfoParser:
# --- Sectioning commands ---
+ def popstack (self, type):
+ if (self.node):
+ self.node.type = type
+ while self.nodestack:
+ if self.nodestack[-1].type > type:
+ self.nodestack[-1].finalize ()
+ self.nodestack[-1].flush ()
+ del self.nodestack [-1]
+ elif self.nodestack[-1].type == type:
+ if not self.nodestack[-1].next:
+ self.nodestack[-1].next = self.node.name
+ if not self.node.prev:
+ self.node.prev = self.nodestack[-1].name
+ self.nodestack[-1].finalize ()
+ self.nodestack[-1].flush ()
+ del self.nodestack [-1]
+ else:
+ if type > 1 and not self.node.up:
+ self.node.up = self.nodestack[-1].name
+ break
+
def do_chapter(self, args):
self.heading('H1', args, 0)
+ self.popstack (1)
+
def do_unnumbered(self, args):
self.heading('H1', args, -1)
+ self.popstack (1)
def do_appendix(self, args):
self.heading('H1', args, -1)
+ self.popstack (1)
def do_top(self, args):
self.heading('H1', args, -1)
def do_chapheading(self, args):
@@ -729,29 +902,39 @@ class TexinfoParser:
def do_section(self, args):
self.heading('H1', args, 1)
+ self.popstack (2)
+
def do_unnumberedsec(self, args):
self.heading('H1', args, -1)
+ self.popstack (2)
def do_appendixsec(self, args):
self.heading('H1', args, -1)
+ self.popstack (2)
do_appendixsection = do_appendixsec
def do_heading(self, args):
self.heading('H1', args, -1)
def do_subsection(self, args):
self.heading('H2', args, 2)
+ self.popstack (3)
def do_unnumberedsubsec(self, args):
self.heading('H2', args, -1)
+ self.popstack (3)
def do_appendixsubsec(self, args):
self.heading('H2', args, -1)
+ self.popstack (3)
def do_subheading(self, args):
self.heading('H2', args, -1)
def do_subsubsection(self, args):
self.heading('H3', args, 3)
+ self.popstack (4)
def do_unnumberedsubsubsec(self, args):
self.heading('H3', args, -1)
+ self.popstack (4)
def do_appendixsubsubsec(self, args):
self.heading('H3', args, -1)
+ self.popstack (4)
def do_subsubheading(self, args):
self.heading('H3', args, -1)
@@ -773,8 +956,8 @@ class TexinfoParser:
print '---', args
def do_contents(self, args):
- pass
- # self.listcontents('Table of Contents', 999)
+ # pass
+ self.listcontents('Table of Contents', 999)
def do_shortcontents(self, args):
pass
@@ -821,14 +1004,8 @@ class TexinfoParser:
# --- Function and variable definitions ---
def bgn_deffn(self, args):
- self.write('<DL><DT>')
- words = splitwords(args, 2)
- [category, name], rest = words[:2], words[2:]
- self.expand('@b{' + name + '}')
- for word in rest: self.expand(' ' + makevar(word))
- self.expand(' -- ' + category)
- self.write('<DD>\n')
- self.index('fn', name)
+ self.write('<DL>')
+ self.do_deffnx (args)
def end_deffn(self):
self.write('</DL>\n')
@@ -845,15 +1022,24 @@ class TexinfoParser:
def bgn_defun(self, args): self.bgn_deffn('Function ' + args)
end_defun = end_deffn
+ def do_defunx(self, args): self.do_deffnx('Function ' + args)
def bgn_defmac(self, args): self.bgn_deffn('Macro ' + args)
end_defmac = end_deffn
+ def do_defmacx(self, args): self.do_deffnx('Macro ' + args)
def bgn_defspec(self, args): self.bgn_deffn('{Special Form} ' + args)
end_defspec = end_deffn
+ def do_defspecx(self, args): self.do_deffnx('{Special Form} ' + args)
def bgn_defvr(self, args):
- self.write('<DL><DT>')
+ self.write('<DL>')
+ self.do_defvrx (args)
+
+ end_defvr = end_deffn
+
+ def do_defvrx(self, args):
+ self.write('<DT>')
words = splitwords(args, 2)
[category, name], rest = words[:2], words[2:]
self.expand('@code{' + name + '}')
@@ -863,18 +1049,24 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('vr', name)
- end_defvr = end_deffn
-
def bgn_defvar(self, args): self.bgn_defvr('Variable ' + args)
end_defvar = end_defvr
+ def do_defvarx(self, args): self.do_defvrx('Variable ' + args)
def bgn_defopt(self, args): self.bgn_defvr('{User Option} ' + args)
end_defopt = end_defvr
+ def do_defoptx(self, args): self.do_defvrx('{User Option} ' + args)
# --- Ditto for typed languages ---
def bgn_deftypefn(self, args):
- self.write('<DL><DT>')
+ self.write('<DL>')
+ self.do_deftypefnx (args)
+
+ end_deftypefn = end_deffn
+
+ def do_deftypefnx(self, args):
+ self.write('<DT>')
words = splitwords(args, 3)
[category, datatype, name], rest = words[:3], words[3:]
self.expand('@code{' + datatype + '} @b{' + name + '}')
@@ -883,15 +1075,21 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('fn', name)
- end_deftypefn = end_deffn
def bgn_deftypefun(self, args): self.bgn_deftypefn('Function ' + args)
end_deftypefun = end_deftypefn
+ def do_deftypefunx(self, args): self.do_deftypefnx('Function ' + args)
def bgn_deftypevr(self, args):
+ self.write('<DL>')
+ self.do_deftypevrx (args)
+
+ end_deftypevr = end_deftypefn
+
+ def do_deftypevrx(self, args):
+ self.write('<DT>')
words = splitwords(args, 3)
[category, datatype, name], rest = words[:3], words[3:]
- self.write('<DL><DT>')
self.expand('@code{' + datatype + '} @b{' + name + '}')
# If there are too many arguments, show them
for word in rest: self.expand(' ' + word)
@@ -899,18 +1097,24 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('fn', name)
- end_deftypevr = end_deftypefn
-
def bgn_deftypevar(self, args):
self.bgn_deftypevr('Variable ' + args)
end_deftypevar = end_deftypevr
+ def do_deftypevarx(self, args):
+ self.do_deftypevrx('Variable ' + args)
# --- Ditto for object-oriented languages ---
def bgn_defcv(self, args):
+ self.write('<DL>')
+ self.do_defcvx(args)
+
+ end_defcv = end_deftypevr
+
+ def do_defcvx(self, args):
+ self.write('<DT>')
words = splitwords(args, 3)
[category, classname, name], rest = words[:3], words[3:]
- self.write('<DL><DT>')
self.expand('@b{' + name + '}')
# If there are too many arguments, show them
for word in rest: self.expand(' ' + word)
@@ -918,14 +1122,20 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('vr', name + ' @r{of ' + classname + '}')
- end_defcv = end_deftypevr
-
def bgn_defivar(self, args):
self.bgn_defcv('{Instance Variable} ' + args)
end_defivar = end_defcv
+ def do_defivarx(self, args):
+ self.do_defcvx('{Instance Variable} ' + args)
def bgn_defop(self, args):
- self.write('<DL><DT>')
+ self.write('<DL>')
+ self.do_defopx (args)
+
+ end_defop = end_defcv
+
+ def do_defopx(self, args):
+ self.write('<DT>')
words = splitwords(args, 3)
[category, classname, name], rest = words[:3], words[3:]
self.expand('@b{' + name + '}')
@@ -934,16 +1144,22 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('fn', name + ' @r{on ' + classname + '}')
- end_defop = end_defcv
-
def bgn_defmethod(self, args):
self.bgn_defop('Method ' + args)
end_defmethod = end_defop
+ def do_defmethodx(self, args):
+ self.do_defopx('Method ' + args)
# --- Ditto for data types ---
def bgn_deftp(self, args):
- self.write('<DL><DT>')
+ self.write('<DL>')
+ self.do_deftpx(args)
+
+ end_deftp = end_defcv
+
+ def do_deftpx(self, args):
+ self.write('<DT>')
words = splitwords(args, 2)
[category, name], rest = words[:2], words[2:]
self.expand('@b{' + name + '}')
@@ -952,8 +1168,6 @@ class TexinfoParser:
self.write('<DD>\n')
self.index('tp', name)
- end_deftp = end_defcv
-
# --- Making Lists and Tables
def bgn_enumerate(self, args):
@@ -1211,15 +1425,18 @@ def makefile(nodename):
# Characters that are perfectly safe in filenames and hyperlinks
-goodchars = string.letters + string.digits + '!@-_=+.'
+goodchars = string.letters + string.digits + '!@-=+.'
-# Replace characters that aren't perfectly safe by underscores
+# Replace characters that aren't perfectly safe by dashes
+# Underscores are bad since Cern HTTPD treats them as delimiters for
+# encoding times, so you get mismatches if you compress your files:
+# a.html.gz will map to a_b.html.gz
def fixfunnychars(addr):
i = 0
while i < len(addr):
c = addr[i]
if c not in goodchars:
- c = '_'
+ c = '-'
addr = addr[:i] + c + addr[i+1:]
i = i + len(c)
return addr
@@ -1252,8 +1469,11 @@ def test():
while sys.argv[1:2] == ['-d']:
parser.debugging = parser.debugging + 1
del sys.argv[1:2]
+ if sys.argv[1] == '-c':
+ parser.cont = 1
+ del sys.argv[1]
if len(sys.argv) <> 3:
- print 'usage: texi2html [-d] [-d] inputfile outputdirectory'
+ print 'usage: texi2html [-d] [-d] [-c] inputfile outputdirectory'
sys.exit(2)
file = sys.argv[1]
parser.setdirname(sys.argv[2])
@@ -1272,3 +1492,9 @@ def test():
test()
+
+# Emacs local variables
+#
+# Local Variables:
+# py-indent-offset: 8
+# End: