#! /usr/bin/env python # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96) # Reads a text file given on standard input or named as first argument, and # generates HTML 2.0 on standard output. Recognizes these constructions: # # HTML element pattern at the beginning of a line # # section heading (<number><period>)+<space> # numbered list element <1-2 spaces>(<number><period>)+<space> # unnumbered list element <0-2 spaces><hyphen or asterisk><space> # preformatted section <more than two spaces> # # Heading level is determined by the number of (<number><period>) segments. # Blank lines force a separation of elements; if none of the above four # types is indicated, a new paragraph begins. A line beginning with many # spaces is interpreted as a continuation (instead of preformatted) after # a list element. Headings are anchored; paragraphs starting with "Q." are # emphasized, and those marked with "A." get their first sentence emphasized. # # Hyperlinks are created from references to: # URLs, explicitly marked using <URL:scheme://host...> # other questions, of the form "question <number>(<period><number>)*" # sections, of the form "section <number>". import sys, string, regex, regsub, regex_syntax regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) # --------------------------------------------------------- regular expressions orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') itemprog = regex.compile(' ? ?[-*] +') headingprog = regex.compile('([1-9][0-9]*\.)+ +') prefmtprog = regex.compile(' ') blankprog = regex.compile('^[ \t\r\n]$') questionprog = regex.compile(' *Q\. +') answerprog = regex.compile(' *A\. +') sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) urlprog = regex.compile('<URL:([^&]+)>') addrprog = regex.compile('<([^>@:]+@[^&@:]+)>') qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)') srefprog = regex.compile('section +([1-9][0-9]*)') entityprog = regex.compile('[&<>]') # ------------------------------------------------------------ global variables body = [] ollev = ullev = 0 element = content = secnum = version = '' # ----------------------------------------------------- for making nested lists def dnol(): global body, ollev ollev = ollev + 1 if body[-1] == '</li>': del body[-1] body.append('<ol>') def upol(): global body, ollev ollev = ollev - 1 body.append(ollev and '</ol></li>' or '</ol>') # --------------------------------- output one element and convert its contents def spew(clearol=0, clearul=0): global content, body, ollev, ullev if content: if entityprog.search(content) > -1: content = regsub.gsub('&', '&', content) content = regsub.gsub('<', '<', content) content = regsub.gsub('>', '>', content) n = questionprog.match(content) if n > 0: content = '<em>' + content[n:] + '</em>' if ollev: # question reference in index fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = '<a href="#%s">%s</a>' % (fragid, content) if element[0] == 'h': # heading in the main text fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = secnum + '<a name="%s">%s</a>' % (fragid, content) n = answerprog.match(content) if n > 0: # answer paragraph content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:]) body.append('<' + element + '>' + content) body.append('</' + element + '>') content = '' while clearol and ollev: upol() if clearul and ullev: body.append('</ul>'); ullev = 0 # ---------------------------------------------------------------- main program faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin lines = faq.readlines() for line in lines: if line[2:9] == '=======': # <hr> will appear *before* body.append('<hr>') # the underlined heading continue n = orditemprog.match(line) if n > 0: # make ordered list item spew(0, 'clear ul') secnum = line[:n] level = string.count(secnum, '.') while level > ollev: dnol() while level < ollev: upol() element, content = 'li', line[n:] continue n = itemprog.match(line) if n > 0: # make unordered list item spew('clear ol', 0) if ullev == 0: body.append('<ul>'); ullev = 1 element, content = 'li', line[n:] continue n = headingprog.match(line) if n > 0: # make heading element spew('clear ol', 'clear ul') secnum = line[:n] sys.stderr.write(line) element, content = 'h%d' % string.count(secnum, '.'), line[n:] continue n = 0 if not secnum: # haven't hit body yet n = mailhdrprog.match(line) v = version and -1 or regex.match('Version: ', line) if v > 0 and not version: version = line[v:] if n <= 0 and element != 'li': # not pre if after a list item n = prefmtprog.match(line) if n > 0: # make preformatted element if element == 'pre': content = content + line else: spew('clear ol', 'clear ul') element, content = 'pre', line continue if blankprog.match(line) > 0: # force a new element spew() element = '' elif element: # continue current element content = content + line else: # no element; make paragraph spew('clear ol', 'clear ul') element, content = 'p', line spew() # output last element body = string.joinfields(body, '') body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body) body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body) body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body) body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body) print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>' print '<head><title>Python Frequently-Asked Questions v' + version print "</title></head><body>(This file was generated using Ping's" print '<a href="faq2html.py">faq2html.py</a>.)' print body + '</body></html>'