summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1995-02-27 13:16:55 (GMT)
committerGuido van Rossum <guido@python.org>1995-02-27 13:16:55 (GMT)
commit7c750e1e099128157430d26ffa7e2a44d87daf3c (patch)
tree7c74472b5402733b5d52519799fbc9415fc4cb6c
parenteb9e9d2b2a61629e7562587a679367c3bb52c92b (diff)
downloadcpython-7c750e1e099128157430d26ffa7e2a44d87daf3c.zip
cpython-7c750e1e099128157430d26ffa7e2a44d87daf3c.tar.gz
cpython-7c750e1e099128157430d26ffa7e2a44d87daf3c.tar.bz2
added html parser and supporting cast
-rw-r--r--Lib/Para.py408
-rw-r--r--Lib/fmt.py621
-rw-r--r--Lib/htmllib.py635
-rw-r--r--Lib/lib-old/Para.py408
-rw-r--r--Lib/lib-old/fmt.py621
-rw-r--r--Lib/sgmllib.py321
6 files changed, 3014 insertions, 0 deletions
diff --git a/Lib/Para.py b/Lib/Para.py
new file mode 100644
index 0000000..6a7057d
--- /dev/null
+++ b/Lib/Para.py
@@ -0,0 +1,408 @@
+# Text formatting abstractions
+
+
+# Oft-used type object
+Int = type(0)
+
+
+# Represent a paragraph. This is a list of words with associated
+# font and size information, plus indents and justification for the
+# entire paragraph.
+# Once the words have been added to a paragraph, it can be laid out
+# for different line widths. Once laid out, it can be rendered at
+# different screen locations. Once rendered, it can be queried
+# for mouse hits, and parts of the text can be highlighted
+class Para:
+ #
+ def __init__(self):
+ self.words = [] # The words
+ self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
+ self.indent_left = self.indent_right = self.indent_hang = 0
+ # Final lay-out parameters, may change
+ self.left = self.top = self.right = self.bottom = \
+ self.width = self.height = self.lines = None
+ #
+ # Add a word, computing size information for it.
+ # Words may also be added manually by appending to self.words
+ # Each word should be a 7-tuple:
+ # (font, text, width, space, stretch, ascent, descent)
+ def addword(self, d, font, text, space, stretch):
+ if font <> None:
+ d.setfont(font)
+ width = d.textwidth(text)
+ ascent = d.baseline()
+ descent = d.lineheight() - ascent
+ spw = d.textwidth(' ')
+ space = space * spw
+ stretch = stretch * spw
+ tuple = (font, text, width, space, stretch, ascent, descent)
+ self.words.append(tuple)
+ #
+ # Hooks to begin and end anchors -- insert numbers in the word list!
+ def bgn_anchor(self, id):
+ self.words.append(id)
+ #
+ def end_anchor(self, id):
+ self.words.append(0)
+ #
+ # Return the total length (width) of the text added so far, in pixels
+ def getlength(self):
+ total = 0
+ for word in self.words:
+ if type(word) <> Int:
+ total = total + word[2] + word[3]
+ return total
+ #
+ # Tab to a given position (relative to the current left indent):
+ # remove all stretch, add fixed space up to the new indent.
+ # If the current position is already beying the tab stop,
+ # don't add any new space (but still remove the stretch)
+ def tabto(self, tab):
+ total = 0
+ as, de = 1, 0
+ for i in range(len(self.words)):
+ word = self.words[i]
+ if type(word) == Int: continue
+ fo, te, wi, sp, st, as, de = word
+ self.words[i] = fo, te, wi, sp, 0, as, de
+ total = total + wi + sp
+ if total < tab:
+ self.words.append(None, '', 0, tab-total, 0, as, de)
+ #
+ # Make a hanging tag: tab to hang, increment indent_left by hang,
+ # and reset indent_hang to -hang
+ def makehangingtag(self, hang):
+ self.tabto(hang)
+ self.indent_left = self.indent_left + hang
+ self.indent_hang = -hang
+ #
+ # Decide where the line breaks will be given some screen width
+ def layout(self, linewidth):
+ self.width = linewidth
+ height = 0
+ self.lines = lines = []
+ avail1 = self.width - self.indent_left - self.indent_right
+ avail = avail1 - self.indent_hang
+ words = self.words
+ i = 0
+ n = len(words)
+ lastfont = None
+ while i < n:
+ firstfont = lastfont
+ charcount = 0
+ width = 0
+ stretch = 0
+ ascent = 0
+ descent = 0
+ lsp = 0
+ j = i
+ while i < n:
+ word = words[i]
+ if type(word) == Int:
+ if word > 0 and width >= avail:
+ break
+ i = i+1
+ continue
+ fo, te, wi, sp, st, as, de = word
+ if width + wi > avail and width > 0 and wi > 0:
+ break
+ if fo <> None:
+ lastfont = fo
+ if width == 0:
+ firstfont = fo
+ charcount = charcount + len(te) + (sp > 0)
+ width = width + wi + sp
+ lsp = sp
+ stretch = stretch + st
+ lst = st
+ ascent = max(ascent, as)
+ descent = max(descent, de)
+ i = i+1
+ while i > j and type(words[i-1]) == Int and \
+ words[i-1] > 0: i = i-1
+ width = width - lsp
+ if i < n:
+ stretch = stretch - lst
+ else:
+ stretch = 0
+ tuple = i-j, firstfont, charcount, width, stretch, \
+ ascent, descent
+ lines.append(tuple)
+ height = height + ascent + descent
+ avail = avail1
+ self.height = height
+ #
+ # Call a function for all words in a line
+ def visit(self, wordfunc, anchorfunc):
+ avail1 = self.width - self.indent_left - self.indent_right
+ avail = avail1 - self.indent_hang
+ v = self.top
+ i = 0
+ for tuple in self.lines:
+ wordcount, firstfont, charcount, width, stretch, \
+ ascent, descent = tuple
+ h = self.left + self.indent_left
+ if i == 0: h = h + self.indent_hang
+ extra = 0
+ if self.just == 'r': h = h + avail - width
+ elif self.just == 'c': h = h + (avail - width) / 2
+ elif self.just == 'lr' and stretch > 0:
+ extra = avail - width
+ v2 = v + ascent + descent
+ for j in range(i, i+wordcount):
+ word = self.words[j]
+ if type(word) == Int:
+ ok = anchorfunc(self, tuple, word, \
+ h, v)
+ if ok <> None: return ok
+ continue
+ fo, te, wi, sp, st, as, de = word
+ if extra > 0 and stretch > 0:
+ ex = extra * st / stretch
+ extra = extra - ex
+ stretch = stretch - st
+ else:
+ ex = 0
+ h2 = h + wi + sp + ex
+ ok = wordfunc(self, tuple, word, h, v, \
+ h2, v2, (j==i), (j==i+wordcount-1))
+ if ok <> None: return ok
+ h = h2
+ v = v2
+ i = i + wordcount
+ avail = avail1
+ #
+ # Render a paragraph in "drawing object" d, using the rectangle
+ # given by (left, top, right) with an unspecified bottom.
+ # Return the computed bottom of the text.
+ def render(self, d, left, top, right):
+ if self.width <> right-left:
+ self.layout(right-left)
+ self.left = left
+ self.top = top
+ self.right = right
+ self.bottom = self.top + self.height
+ self.anchorid = 0
+ try:
+ self.d = d
+ self.visit(self.__class__._renderword, \
+ self.__class__._renderanchor)
+ finally:
+ self.d = None
+ return self.bottom
+ #
+ def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+ if word[0] <> None: self.d.setfont(word[0])
+ baseline = v + tuple[5]
+ self.d.text((h, baseline - word[5]), word[1])
+ if self.anchorid > 0:
+ self.d.line((h, baseline+2), (h2, baseline+2))
+ #
+ def _renderanchor(self, tuple, word, h, v):
+ self.anchorid = word
+ #
+ # Return which anchor(s) was hit by the mouse
+ def hitcheck(self, mouseh, mousev):
+ self.mouseh = mouseh
+ self.mousev = mousev
+ self.anchorid = 0
+ self.hits = []
+ self.visit(self.__class__._hitcheckword, \
+ self.__class__._hitcheckanchor)
+ return self.hits
+ #
+ def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+ if self.anchorid > 0 and h <= self.mouseh <= h2 and \
+ v <= self.mousev <= v2:
+ self.hits.append(self.anchorid)
+ #
+ def _hitcheckanchor(self, tuple, word, h, v):
+ self.anchorid = word
+ #
+ # Return whether the given anchor id is present
+ def hasanchor(self, id):
+ return id in self.words or -id in self.words
+ #
+ # Extract the raw text from the word list, substituting one space
+ # for non-empty inter-word space, and terminating with '\n'
+ def extract(self):
+ text = ''
+ for w in self.words:
+ if type(w) <> Int:
+ word = w[1]
+ if w[3]: word = word + ' '
+ text = text + word
+ return text + '\n'
+ #
+ # Return which character position was hit by the mouse, as
+ # an offset in the entire text as returned by extract().
+ # Return None if the mouse was not in this paragraph
+ def whereis(self, d, mouseh, mousev):
+ if mousev < self.top or mousev > self.bottom:
+ return None
+ self.mouseh = mouseh
+ self.mousev = mousev
+ self.lastfont = None
+ self.charcount = 0
+ try:
+ self.d = d
+ return self.visit(self.__class__._whereisword, \
+ self.__class__._whereisanchor)
+ finally:
+ self.d = None
+ #
+ def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+ fo, te, wi, sp, st, as, de = word
+ if fo <> None: self.lastfont = fo
+ h = h1
+ if isfirst: h1 = 0
+ if islast: h2 = 999999
+ if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
+ self.charcount = self.charcount + len(te) + (sp > 0)
+ return
+ if self.lastfont <> None:
+ self.d.setfont(self.lastfont)
+ cc = 0
+ for c in te:
+ cw = self.d.textwidth(c)
+ if self.mouseh <= h + cw/2:
+ return self.charcount + cc
+ cc = cc+1
+ h = h+cw
+ self.charcount = self.charcount + cc
+ if self.mouseh <= (h+h2) / 2:
+ return self.charcount
+ else:
+ return self.charcount + 1
+ #
+ def _whereisanchor(self, tuple, word, h, v):
+ pass
+ #
+ # Return screen position corresponding to position in paragraph.
+ # Return tuple (h, vtop, vbaseline, vbottom).
+ # This is more or less the inverse of whereis()
+ def screenpos(self, d, pos):
+ if pos < 0:
+ ascent, descent = self.lines[0][5:7]
+ return self.left, self.top, self.top + ascent, \
+ self.top + ascent + descent
+ self.pos = pos
+ self.lastfont = None
+ try:
+ self.d = d
+ ok = self.visit(self.__class__._screenposword, \
+ self.__class__._screenposanchor)
+ finally:
+ self.d = None
+ if ok == None:
+ ascent, descent = self.lines[-1][5:7]
+ ok = self.right, self.bottom - ascent - descent, \
+ self.bottom - descent, self.bottom
+ return ok
+ #
+ def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+ fo, te, wi, sp, st, as, de = word
+ if fo <> None: self.lastfont = fo
+ cc = len(te) + (sp > 0)
+ if self.pos > cc:
+ self.pos = self.pos - cc
+ return
+ if self.pos < cc:
+ self.d.setfont(self.lastfont)
+ h = h1 + self.d.textwidth(te[:self.pos])
+ else:
+ h = h2
+ ascent, descent = tuple[5:7]
+ return h, v1, v1+ascent, v2
+ #
+ def _screenposanchor(self, tuple, word, h, v):
+ pass
+ #
+ # Invert the stretch of text between pos1 and pos2.
+ # If pos1 is None, the beginning is implied;
+ # if pos2 is None, the end is implied.
+ # Undoes its own effect when called again with the same arguments
+ def invert(self, d, pos1, pos2):
+ if pos1 == None:
+ pos1 = self.left, self.top, self.top, self.top
+ else:
+ pos1 = self.screenpos(d, pos1)
+ if pos2 == None:
+ pos2 = self.right, self.bottom,self.bottom,self.bottom
+ else:
+ pos2 = self.screenpos(d, pos2)
+ h1, top1, baseline1, bottom1 = pos1
+ h2, top2, baseline2, bottom2 = pos2
+ if bottom1 <= top2:
+ d.invert((h1, top1), (self.right, bottom1))
+ h1 = self.left
+ if bottom1 < top2:
+ d.invert((h1, bottom1), (self.right, top2))
+ top1, bottom1 = top2, bottom2
+ d.invert((h1, top1), (h2, bottom2))
+
+
+# Test class Para
+# XXX This was last used on the Mac, hence the weird fonts...
+def test():
+ import stdwin
+ from stdwinevents import *
+ words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
+ 'the', 'lazy', 'dog.'
+ paralist = []
+ for just in 'l', 'r', 'lr', 'c':
+ p = Para()
+ p.just = just
+ p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
+ for word in words[1:-1]:
+ p.addword(stdwin, None, word, 1, 1)
+ p.addword(stdwin, None, words[-1], 2, 4)
+ p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
+ p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
+ paralist.append(p)
+ window = stdwin.open('Para.test()')
+ start = stop = selpara = None
+ while 1:
+ etype, win, detail = stdwin.getevent()
+ if etype == WE_CLOSE:
+ break
+ if etype == WE_SIZE:
+ window.change((0, 0), (1000, 1000))
+ if etype == WE_DRAW:
+ width, height = window.getwinsize()
+ d = None
+ try:
+ d = window.begindrawing()
+ d.cliprect(detail)
+ d.erase(detail)
+ v = 0
+ for p in paralist:
+ v = p.render(d, 0, v, width)
+ if p == selpara and \
+ start <> None and stop <> None:
+ p.invert(d, start, stop)
+ finally:
+ if d: d.close()
+ if etype == WE_MOUSE_DOWN:
+ if selpara and start <> None and stop <> None:
+ d = window.begindrawing()
+ selpara.invert(d, start, stop)
+ d.close()
+ start = stop = selpara = None
+ mouseh, mousev = detail[0]
+ for p in paralist:
+ start = p.whereis(stdwin, mouseh, mousev)
+ if start <> None:
+ selpara = p
+ break
+ if etype == WE_MOUSE_UP and start <> None and selpara:
+ mouseh, mousev = detail[0]
+ stop = selpara.whereis(stdwin, mouseh, mousev)
+ if stop == None: start = selpara = None
+ else:
+ if start > stop:
+ start, stop = stop, start
+ d = window.begindrawing()
+ selpara.invert(d, start, stop)
+ d.close()
+ window.close()
diff --git a/Lib/fmt.py b/Lib/fmt.py
new file mode 100644
index 0000000..c096306
--- /dev/null
+++ b/Lib/fmt.py
@@ -0,0 +1,621 @@
+# Text formatting abstractions
+
+
+import string
+import Para
+
+
+# A formatter back-end object has one method that is called by the formatter:
+# addpara(p), where p is a paragraph object. For example:
+
+
+# Formatter back-end to do nothing at all with the paragraphs
+class NullBackEnd:
+ #
+ def __init__(self):
+ pass
+ #
+ def addpara(self, p):
+ pass
+ #
+ def bgn_anchor(self, id):
+ pass
+ #
+ def end_anchor(self, id):
+ pass
+
+
+# Formatter back-end to collect the paragraphs in a list
+class SavingBackEnd(NullBackEnd):
+ #
+ def __init__(self):
+ self.paralist = []
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ #
+ def hitcheck(self, h, v):
+ hits = []
+ for p in self.paralist:
+ if p.top <= v <= p.bottom:
+ for id in p.hitcheck(h, v):
+ if id not in hits:
+ hits.append(id)
+ return hits
+ #
+ def extract(self):
+ text = ''
+ for p in self.paralist:
+ text = text + (p.extract())
+ return text
+ #
+ def extractpart(self, long1, long2):
+ if long1 > long2: long1, long2 = long2, long1
+ para1, pos1 = long1
+ para2, pos2 = long2
+ text = ''
+ while para1 < para2:
+ ptext = self.paralist[para1].extract()
+ text = text + ptext[pos1:]
+ pos1 = 0
+ para1 = para1 + 1
+ ptext = self.paralist[para2].extract()
+ return text + ptext[pos1:pos2]
+ #
+ def whereis(self, d, h, v):
+ total = 0
+ for i in range(len(self.paralist)):
+ p = self.paralist[i]
+ result = p.whereis(d, h, v)
+ if result <> None:
+ return i, result
+ return None
+ #
+ def roundtowords(self, long1, long2):
+ i, offset = long1
+ text = self.paralist[i].extract()
+ while offset > 0 and text[offset-1] <> ' ': offset = offset-1
+ long1 = i, offset
+ #
+ i, offset = long2
+ text = self.paralist[i].extract()
+ n = len(text)
+ while offset < n-1 and text[offset] <> ' ': offset = offset+1
+ long2 = i, offset
+ #
+ return long1, long2
+ #
+ def roundtoparagraphs(self, long1, long2):
+ long1 = long1[0], 0
+ long2 = long2[0], len(self.paralist[long2[0]].extract())
+ return long1, long2
+
+
+# Formatter back-end to send the text directly to the drawing object
+class WritingBackEnd(NullBackEnd):
+ #
+ def __init__(self, d, width):
+ self.d = d
+ self.width = width
+ self.lineno = 0
+ #
+ def addpara(self, p):
+ self.lineno = p.render(self.d, 0, self.lineno, self.width)
+
+
+# A formatter receives a stream of formatting instructions and assembles
+# these into a stream of paragraphs on to a back-end. The assembly is
+# parametrized by a text measurement object, which must match the output
+# operations of the back-end. The back-end is responsible for splitting
+# paragraphs up in lines of a given maximum width. (This is done because
+# in a windowing environment, when the window size changes, there is no
+# need to redo the assembly into paragraphs, but the splitting into lines
+# must be done taking the new window size into account.)
+
+
+# Formatter base class. Initialize it with a text measurement object,
+# which is used for text measurements, and a back-end object,
+# which receives the completed paragraphs. The formatting methods are:
+# setfont(font)
+# setleftindent(nspaces)
+# setjust(type) where type is 'l', 'c', 'r', or 'lr'
+# flush()
+# vspace(nlines)
+# needvspace(nlines)
+# addword(word, nspaces)
+class BaseFormatter:
+ #
+ def __init__(self, d, b):
+ # Drawing object used for text measurements
+ self.d = d
+ #
+ # BackEnd object receiving completed paragraphs
+ self.b = b
+ #
+ # Parameters of the formatting model
+ self.leftindent = 0
+ self.just = 'l'
+ self.font = None
+ self.blanklines = 0
+ #
+ # Parameters derived from the current font
+ self.space = d.textwidth(' ')
+ self.line = d.lineheight()
+ self.ascent = d.baseline()
+ self.descent = self.line - self.ascent
+ #
+ # Parameter derived from the default font
+ self.n_space = self.space
+ #
+ # Current paragraph being built
+ self.para = None
+ self.nospace = 1
+ #
+ # Font to set on the next word
+ self.nextfont = None
+ #
+ def newpara(self):
+ return Para.Para()
+ #
+ def setfont(self, font):
+ if font == None: return
+ self.font = self.nextfont = font
+ d = self.d
+ d.setfont(font)
+ self.space = d.textwidth(' ')
+ self.line = d.lineheight()
+ self.ascent = d.baseline()
+ self.descent = self.line - self.ascent
+ #
+ def setleftindent(self, nspaces):
+ self.leftindent = int(self.n_space * nspaces)
+ if self.para:
+ hang = self.leftindent - self.para.indent_left
+ if hang > 0 and self.para.getlength() <= hang:
+ self.para.makehangingtag(hang)
+ self.nospace = 1
+ else:
+ self.flush()
+ #
+ def setrightindent(self, nspaces):
+ self.rightindent = int(self.n_space * nspaces)
+ if self.para:
+ self.para.indent_right = self.rightindent
+ self.flush()
+ #
+ def setjust(self, just):
+ self.just = just
+ if self.para:
+ self.para.just = self.just
+ #
+ def flush(self):
+ if self.para:
+ self.b.addpara(self.para)
+ self.para = None
+ if self.font <> None:
+ self.d.setfont(self.font)
+ self.nospace = 1
+ #
+ def vspace(self, nlines):
+ self.flush()
+ if nlines > 0:
+ self.para = self.newpara()
+ tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
+ self.para.words.append(tuple)
+ self.flush()
+ self.blanklines = self.blanklines + nlines
+ #
+ def needvspace(self, nlines):
+ self.flush() # Just to be sure
+ if nlines > self.blanklines:
+ self.vspace(nlines - self.blanklines)
+ #
+ def addword(self, text, space):
+ if self.nospace and not text:
+ return
+ self.nospace = 0
+ self.blanklines = 0
+ if not self.para:
+ self.para = self.newpara()
+ self.para.indent_left = self.leftindent
+ self.para.just = self.just
+ self.nextfont = self.font
+ space = int(space * self.space)
+ self.para.words.append(self.nextfont, text, \
+ self.d.textwidth(text), space, space, \
+ self.ascent, self.descent)
+ self.nextfont = None
+ #
+ def bgn_anchor(self, id):
+ if not self.para:
+ self.nospace = 0
+ self.addword('', 0)
+ self.para.bgn_anchor(id)
+ #
+ def end_anchor(self, id):
+ if not self.para:
+ self.nospace = 0
+ self.addword('', 0)
+ self.para.end_anchor(id)
+
+
+# Measuring object for measuring text as viewed on a tty
+class NullMeasurer:
+ #
+ def __init__(self):
+ pass
+ #
+ def setfont(self, font):
+ pass
+ #
+ def textwidth(self, text):
+ return len(text)
+ #
+ def lineheight(self):
+ return 1
+ #
+ def baseline(self):
+ return 0
+
+
+# Drawing object for writing plain ASCII text to a file
+class FileWriter:
+ #
+ def __init__(self, fp):
+ self.fp = fp
+ self.lineno, self.colno = 0, 0
+ #
+ def setfont(self, font):
+ pass
+ #
+ def text(self, (h, v), str):
+ if not str: return
+ if '\n' in str:
+ raise ValueError, 'can\'t write \\n'
+ while self.lineno < v:
+ self.fp.write('\n')
+ self.colno, self.lineno = 0, self.lineno + 1
+ while self.lineno > v:
+ # XXX This should never happen...
+ self.fp.write('\033[A') # ANSI up arrow
+ self.lineno = self.lineno - 1
+ if self.colno < h:
+ self.fp.write(' ' * (h - self.colno))
+ elif self.colno > h:
+ self.fp.write('\b' * (self.colno - h))
+ self.colno = h
+ self.fp.write(str)
+ self.colno = h + len(str)
+
+
+# Formatting class to do nothing at all with the data
+class NullFormatter(BaseFormatter):
+ #
+ def __init__(self):
+ d = NullMeasurer()
+ b = NullBackEnd()
+ BaseFormatter.__init__(self, d, b)
+
+
+# Formatting class to write directly to a file
+class WritingFormatter(BaseFormatter):
+ #
+ def __init__(self, fp, width):
+ dm = NullMeasurer()
+ dw = FileWriter(fp)
+ b = WritingBackEnd(dw, width)
+ BaseFormatter.__init__(self, dm, b)
+ self.blanklines = 1
+ #
+ # Suppress multiple blank lines
+ def needvspace(self, nlines):
+ BaseFormatter.needvspace(self, min(1, nlines))
+
+
+# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
+# _italic text_ and _underlined words_, and `quoted text'.
+# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
+# italic, bold, underline, quote).
+# Moreover, if the font is in upper case, the text is converted to
+# UPPER CASE.
+class FunnyFormatter(WritingFormatter):
+ #
+ def flush(self):
+ if self.para: finalize(self.para)
+ WritingFormatter.flush(self)
+
+
+# Surrounds *bold words* and _italic text_ in a paragraph with
+# appropriate markers, fixing the size (assuming these characters'
+# width is 1).
+openchar = \
+ {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
+closechar = \
+ {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
+def finalize(para):
+ oldfont = curfont = 'r'
+ para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
+ for i in range(len(para.words)):
+ fo, te, wi = para.words[i][:3]
+ if fo <> None: curfont = fo
+ if curfont <> oldfont:
+ if closechar.has_key(oldfont):
+ c = closechar[oldfont]
+ j = i-1
+ while j > 0 and para.words[j][1] == '': j = j-1
+ fo1, te1, wi1 = para.words[j][:3]
+ te1 = te1 + c
+ wi1 = wi1 + len(c)
+ para.words[j] = (fo1, te1, wi1) + \
+ para.words[j][3:]
+ if openchar.has_key(curfont) and te:
+ c = openchar[curfont]
+ te = c + te
+ wi = len(c) + wi
+ para.words[i] = (fo, te, wi) + \
+ para.words[i][3:]
+ if te: oldfont = curfont
+ else: oldfont = 'r'
+ if curfont in string.uppercase:
+ te = string.upper(te)
+ para.words[i] = (fo, te, wi) + para.words[i][3:]
+ del para.words[-1]
+
+
+# Formatter back-end to draw the text in a window.
+# This has an option to draw while the paragraphs are being added,
+# to minimize the delay before the user sees anything.
+# This manages the entire "document" of the window.
+class StdwinBackEnd(SavingBackEnd):
+ #
+ def __init__(self, window, drawnow):
+ self.window = window
+ self.drawnow = drawnow
+ self.width = window.getwinsize()[0]
+ self.selection = None
+ self.height = 0
+ window.setorigin(0, 0)
+ window.setdocsize(0, 0)
+ self.d = window.begindrawing()
+ SavingBackEnd.__init__(self)
+ #
+ def finish(self):
+ self.d.close()
+ self.d = None
+ self.window.setdocsize(0, self.height)
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ if self.drawnow:
+ self.height = \
+ p.render(self.d, 0, self.height, self.width)
+ else:
+ p.layout(self.width)
+ p.left = 0
+ p.top = self.height
+ p.right = self.width
+ p.bottom = self.height + p.height
+ self.height = p.bottom
+ #
+ def resize(self):
+ self.window.change((0, 0), (self.width, self.height))
+ self.width = self.window.getwinsize()[0]
+ self.height = 0
+ for p in self.paralist:
+ p.layout(self.width)
+ p.left = 0
+ p.top = self.height
+ p.right = self.width
+ p.bottom = self.height + p.height
+ self.height = p.bottom
+ self.window.change((0, 0), (self.width, self.height))
+ self.window.setdocsize(0, self.height)
+ #
+ def redraw(self, area):
+ d = self.window.begindrawing()
+ (left, top), (right, bottom) = area
+ d.erase(area)
+ d.cliprect(area)
+ for p in self.paralist:
+ if top < p.bottom and p.top < bottom:
+ v = p.render(d, p.left, p.top, p.right)
+ if self.selection:
+ self.invert(d, self.selection)
+ d.close()
+ #
+ def setselection(self, new):
+ if new:
+ long1, long2 = new
+ pos1 = long1[:3]
+ pos2 = long2[:3]
+ new = pos1, pos2
+ if new <> self.selection:
+ d = self.window.begindrawing()
+ if self.selection:
+ self.invert(d, self.selection)
+ if new:
+ self.invert(d, new)
+ d.close()
+ self.selection = new
+ #
+ def getselection(self):
+ return self.selection
+ #
+ def extractselection(self):
+ if self.selection:
+ a, b = self.selection
+ return self.extractpart(a, b)
+ else:
+ return None
+ #
+ def invert(self, d, region):
+ long1, long2 = region
+ if long1 > long2: long1, long2 = long2, long1
+ para1, pos1 = long1
+ para2, pos2 = long2
+ while para1 < para2:
+ self.paralist[para1].invert(d, pos1, None)
+ pos1 = None
+ para1 = para1 + 1
+ self.paralist[para2].invert(d, pos1, pos2)
+ #
+ def search(self, prog):
+ import regex, string
+ if type(prog) == type(''):
+ prog = regex.compile(string.lower(prog))
+ if self.selection:
+ iold = self.selection[0][0]
+ else:
+ iold = -1
+ hit = None
+ for i in range(len(self.paralist)):
+ if i == iold or i < iold and hit:
+ continue
+ p = self.paralist[i]
+ text = string.lower(p.extract())
+ if prog.search(text) >= 0:
+ a, b = prog.regs[0]
+ long1 = i, a
+ long2 = i, b
+ hit = long1, long2
+ if i > iold:
+ break
+ if hit:
+ self.setselection(hit)
+ i = hit[0][0]
+ p = self.paralist[i]
+ self.window.show((p.left, p.top), (p.right, p.bottom))
+ return 1
+ else:
+ return 0
+ #
+ def showanchor(self, id):
+ for i in range(len(self.paralist)):
+ p = self.paralist[i]
+ if p.hasanchor(id):
+ long1 = i, 0
+ long2 = i, len(p.extract())
+ hit = long1, long2
+ self.setselection(hit)
+ self.window.show( \
+ (p.left, p.top), (p.right, p.bottom))
+ break
+
+
+# GL extensions
+
+class GLFontCache:
+ #
+ def __init__(self):
+ self.reset()
+ self.setfont('')
+ #
+ def reset(self):
+ self.fontkey = None
+ self.fonthandle = None
+ self.fontinfo = None
+ self.fontcache = {}
+ #
+ def close(self):
+ self.reset()
+ #
+ def setfont(self, fontkey):
+ if fontkey == '':
+ fontkey = 'Times-Roman 12'
+ elif ' ' not in fontkey:
+ fontkey = fontkey + ' 12'
+ if fontkey == self.fontkey:
+ return
+ if self.fontcache.has_key(fontkey):
+ handle = self.fontcache[fontkey]
+ else:
+ import string
+ i = string.index(fontkey, ' ')
+ name, sizestr = fontkey[:i], fontkey[i:]
+ size = eval(sizestr)
+ key1 = name + ' 1'
+ key = name + ' ' + `size`
+ # NB key may differ from fontkey!
+ if self.fontcache.has_key(key):
+ handle = self.fontcache[key]
+ else:
+ if self.fontcache.has_key(key1):
+ handle = self.fontcache[key1]
+ else:
+ import fm
+ handle = fm.findfont(name)
+ self.fontcache[key1] = handle
+ handle = handle.scalefont(size)
+ self.fontcache[fontkey] = \
+ self.fontcache[key] = handle
+ self.fontkey = fontkey
+ if self.fonthandle <> handle:
+ self.fonthandle = handle
+ self.fontinfo = handle.getfontinfo()
+ handle.setfont()
+
+
+class GLMeasurer(GLFontCache):
+ #
+ def textwidth(self, text):
+ return self.fonthandle.getstrwidth(text)
+ #
+ def baseline(self):
+ return self.fontinfo[6] - self.fontinfo[3]
+ #
+ def lineheight(self):
+ return self.fontinfo[6]
+
+
+class GLWriter(GLFontCache):
+ #
+ # NOTES:
+ # (1) Use gl.ortho2 to use X pixel coordinates!
+ #
+ def text(self, (h, v), text):
+ import gl, fm
+ gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
+ fm.prstr(text)
+ #
+ def setfont(self, fontkey):
+ oldhandle = self.fonthandle
+ GLFontCache.setfont(fontkey)
+ if self.fonthandle <> oldhandle:
+ handle.setfont()
+
+
+class GLMeasurerWriter(GLMeasurer, GLWriter):
+ pass
+
+
+class GLBackEnd(SavingBackEnd):
+ #
+ def __init__(self, wid):
+ import gl
+ gl.winset(wid)
+ self.wid = wid
+ self.width = gl.getsize()[1]
+ self.height = 0
+ self.d = GLMeasurerWriter()
+ SavingBackEnd.__init__(self)
+ #
+ def finish(self):
+ pass
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ self.height = p.render(self.d, 0, self.height, self.width)
+ #
+ def redraw(self):
+ import gl
+ gl.winset(self.wid)
+ width = gl.getsize()[1]
+ if width <> self.width:
+ setdocsize = 1
+ self.width = width
+ for p in self.paralist:
+ p.top = p.bottom = None
+ d = self.d
+ v = 0
+ for p in self.paralist:
+ v = p.render(d, 0, v, width)
diff --git a/Lib/htmllib.py b/Lib/htmllib.py
new file mode 100644
index 0000000..8b3e62b
--- /dev/null
+++ b/Lib/htmllib.py
@@ -0,0 +1,635 @@
+# A parser for HTML documents
+
+
+# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
+# describe hypertext documents
+#
+# SGML: Standard Generalized Markup Language
+#
+# WWW: World-Wide Web; a distributed hypertext system develped at CERN
+#
+# CERN: European Particle Physics Laboratory in Geneva, Switzerland
+
+
+# This file is only concerned with parsing and formatting HTML
+# documents, not with the other (hypertext and networking) aspects of
+# the WWW project. (It does support highlighting of anchors.)
+
+
+import os
+import sys
+import regex
+import string
+import sgmllib
+
+
+class HTMLParser(sgmllib.SGMLParser):
+
+ # Copy base class entities and add some
+ entitydefs = {}
+ for key in sgmllib.SGMLParser.entitydefs.keys():
+ entitydefs[key] = sgmllib.SGMLParser.entitydefs[key]
+ entitydefs['bullet'] = '*'
+
+ # Provided -- handlers for tags introducing literal text
+
+ def start_listing(self, attrs):
+ self.setliteral('listing')
+ self.literal_bgn('listing', attrs)
+
+ def end_listing(self):
+ self.literal_end('listing')
+
+ def start_xmp(self, attrs):
+ self.setliteral('xmp')
+ self.literal_bgn('xmp', attrs)
+
+ def end_xmp(self):
+ self.literal_end('xmp')
+
+ def do_plaintext(self, attrs):
+ self.setnomoretags()
+ self.literal_bgn('plaintext', attrs)
+
+ # To be overridden -- begin/end literal mode
+ def literal_bgn(self, tag, attrs): pass
+ def literal_end(self, tag): pass
+
+
+# Next level of sophistication -- collect anchors, title, nextid and isindex
+class CollectingParser(HTMLParser):
+ #
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.savetext = None
+ self.nextid = ''
+ self.isindex = 0
+ self.title = ''
+ self.inanchor = 0
+ self.anchors = []
+ self.anchornames = []
+ self.anchortypes = []
+ #
+ def start_a(self, attrs):
+ self.inanchor = 0
+ href = ''
+ name = ''
+ type = ''
+ for attrname, value in attrs:
+ if attrname == 'href':
+ href = value
+ if attrname == 'name=':
+ name = value
+ if attrname == 'type=':
+ type = string.lower(value)
+ if not (href or name):
+ return
+ self.anchors.append(href)
+ self.anchornames.append(name)
+ self.anchortypes.append(type)
+ self.inanchor = len(self.anchors)
+ if not href:
+ self.inanchor = -self.inanchor
+ #
+ def end_a(self):
+ if self.inanchor > 0:
+ # Don't show anchors pointing into the current document
+ if self.anchors[self.inanchor-1][:1] <> '#':
+ self.handle_data('[' + `self.inanchor` + ']')
+ self.inanchor = 0
+ #
+ def start_header(self, attrs): pass
+ def end_header(self): pass
+ #
+ # (head is the same as header)
+ def start_head(self, attrs): pass
+ def end_head(self): pass
+ #
+ def start_body(self, attrs): pass
+ def end_body(self): pass
+ #
+ def do_nextid(self, attrs):
+ self.nextid = attrs
+ #
+ def do_isindex(self, attrs):
+ self.isindex = 1
+ #
+ def start_title(self, attrs):
+ self.savetext = ''
+ #
+ def end_title(self):
+ if self.savetext <> None:
+ self.title = self.savetext
+ self.savetext = None
+ #
+ def handle_data(self, text):
+ if self.savetext is not None:
+ self.savetext = self.savetext + text
+
+
+# Formatting parser -- takes a formatter and a style sheet as arguments
+
+# XXX The use of style sheets should change: for each tag and end tag
+# there should be a style definition, and a style definition should
+# encompass many more parameters: font, justification, indentation,
+# vspace before, vspace after, hanging tag...
+
+wordprog = regex.compile('[^ \t\n]*')
+spaceprog = regex.compile('[ \t\n]*')
+
+class FormattingParser(CollectingParser):
+
+ def __init__(self, formatter, stylesheet):
+ CollectingParser.__init__(self)
+ self.fmt = formatter
+ self.stl = stylesheet
+ self.savetext = None
+ self.compact = 0
+ self.nofill = 0
+ self.resetfont()
+ self.setindent(self.stl.stdindent)
+
+ def resetfont(self):
+ self.fontstack = []
+ self.stylestack = []
+ self.fontset = self.stl.stdfontset
+ self.style = ROMAN
+ self.passfont()
+
+ def passfont(self):
+ font = self.fontset[self.style]
+ self.fmt.setfont(font)
+
+ def pushstyle(self, style):
+ self.stylestack.append(self.style)
+ self.style = min(style, len(self.fontset)-1)
+ self.passfont()
+
+ def popstyle(self):
+ self.style = self.stylestack[-1]
+ del self.stylestack[-1]
+ self.passfont()
+
+ def pushfontset(self, fontset, style):
+ self.fontstack.append(self.fontset)
+ self.fontset = fontset
+ self.pushstyle(style)
+
+ def popfontset(self):
+ self.fontset = self.fontstack[-1]
+ del self.fontstack[-1]
+ self.popstyle()
+
+ def flush(self):
+ self.fmt.flush()
+
+ def setindent(self, n):
+ self.fmt.setleftindent(n)
+
+ def needvspace(self, n):
+ self.fmt.needvspace(n)
+
+ def close(self):
+ HTMLParser.close(self)
+ self.fmt.flush()
+
+ def handle_literal(self, text):
+ lines = string.splitfields(text, '\n')
+ for i in range(1, len(lines)):
+ lines[i] = string.expandtabs(lines[i], 8)
+ for line in lines[:-1]:
+ self.fmt.addword(line, 0)
+ self.fmt.flush()
+ self.fmt.nospace = 0
+ for line in lines[-1:]:
+ self.fmt.addword(line, 0)
+
+ def handle_data(self, text):
+ if self.savetext is not None:
+ self.savetext = self.savetext + text
+ return
+ if self.literal:
+ self.handle_literal(text)
+ return
+ i = 0
+ n = len(text)
+ while i < n:
+ j = i + wordprog.match(text, i)
+ word = text[i:j]
+ i = j + spaceprog.match(text, j)
+ self.fmt.addword(word, i-j)
+ if self.nofill and '\n' in text[j:i]:
+ self.fmt.flush()
+ self.fmt.nospace = 0
+ i = j+1
+ while text[i-1] <> '\n': i = i+1
+
+ def literal_bgn(self, tag, attrs):
+ if tag == 'plaintext':
+ self.flush()
+ else:
+ self.needvspace(1)
+ self.pushfontset(self.stl.stdfontset, FIXED)
+ self.setindent(self.stl.literalindent)
+
+ def literal_end(self, tag):
+ self.needvspace(1)
+ self.popfontset()
+ self.setindent(self.stl.stdindent)
+
+ def start_title(self, attrs):
+ self.flush()
+ self.savetext = ''
+ # NB end_title is unchanged
+
+ def do_p(self, attrs):
+ if self.compact:
+ self.flush()
+ else:
+ self.needvspace(1)
+
+ def start_h1(self, attrs):
+ self.needvspace(2)
+ self.setindent(self.stl.h1indent)
+ self.pushfontset(self.stl.h1fontset, BOLD)
+ self.fmt.setjust('c')
+
+ def end_h1(self):
+ self.popfontset()
+ self.needvspace(2)
+ self.setindent(self.stl.stdindent)
+ self.fmt.setjust('l')
+
+ def start_h2(self, attrs):
+ self.needvspace(1)
+ self.setindent(self.stl.h2indent)
+ self.pushfontset(self.stl.h2fontset, BOLD)
+
+ def end_h2(self):
+ self.popfontset()
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ def start_h3(self, attrs):
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+ self.pushfontset(self.stl.h3fontset, BOLD)
+
+ def end_h3(self):
+ self.popfontset()
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ def start_h4(self, attrs):
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+ self.pushfontset(self.stl.stdfontset, BOLD)
+
+ def end_h4(self):
+ self.popfontset()
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ start_h5 = start_h4
+ end_h5 = end_h4
+
+ start_h6 = start_h5
+ end_h6 = end_h5
+
+ start_h7 = start_h6
+ end_h7 = end_h6
+
+ def start_ul(self, attrs):
+ self.needvspace(1)
+ for attrname, value in attrs:
+ if attrname == 'compact':
+ self.compact = 1
+ self.setindent(0)
+ break
+ else:
+ self.setindent(self.stl.ulindent)
+
+ start_dir = start_menu = start_ol = start_ul
+
+ do_li = do_p
+
+ def end_ul(self):
+ self.compact = 0
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ end_dir = end_menu = end_ol = end_ul
+
+ def start_dl(self, attrs):
+ for attrname, value in attrs:
+ if attrname == 'compact':
+ self.compact = 1
+ self.needvspace(1)
+
+ def end_dl(self):
+ self.compact = 0
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ def do_dt(self, attrs):
+ if self.compact:
+ self.flush()
+ else:
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+
+ def do_dd(self, attrs):
+ self.fmt.addword('', 1)
+ self.setindent(self.stl.ddindent)
+
+ def start_address(self, attrs):
+ self.compact = 1
+ self.needvspace(1)
+ self.fmt.setjust('r')
+
+ def end_address(self):
+ self.compact = 0
+ self.needvspace(1)
+ self.setindent(self.stl.stdindent)
+ self.fmt.setjust('l')
+
+ def start_pre(self, attrs):
+ self.needvspace(1)
+ self.nofill = self.nofill + 1
+ self.pushstyle(FIXED)
+
+ def end_pre(self):
+ self.popstyle()
+ self.nofill = self.nofill - 1
+ self.needvspace(1)
+
+ start_typewriter = start_pre
+ end_typewriter = end_pre
+
+ def do_img(self, attrs):
+ self.fmt.addword('(image)', 0)
+
+ # Physical styles
+
+ def start_tt(self, attrs): self.pushstyle(FIXED)
+ def end_tt(self): self.popstyle()
+
+ def start_b(self, attrs): self.pushstyle(BOLD)
+ def end_b(self): self.popstyle()
+
+ def start_i(self, attrs): self.pushstyle(ITALIC)
+ def end_i(self): self.popstyle()
+
+ def start_u(self, attrs): self.pushstyle(ITALIC) # Underline???
+ def end_u(self): self.popstyle()
+
+ def start_r(self, attrs): self.pushstyle(ROMAN) # Not official
+ def end_r(self): self.popstyle()
+
+ # Logical styles
+
+ start_em = start_i
+ end_em = end_i
+
+ start_strong = start_b
+ end_strong = end_b
+
+ start_code = start_tt
+ end_code = end_tt
+
+ start_samp = start_tt
+ end_samp = end_tt
+
+ start_kbd = start_tt
+ end_kbd = end_tt
+
+ start_file = start_tt # unofficial
+ end_file = end_tt
+
+ start_var = start_i
+ end_var = end_i
+
+ start_dfn = start_i
+ end_dfn = end_i
+
+ start_cite = start_i
+ end_cite = end_i
+
+ start_hp1 = start_i
+ end_hp1 = start_i
+
+ start_hp2 = start_b
+ end_hp2 = end_b
+
+ def unknown_starttag(self, tag, attrs):
+ print '*** unknown <' + tag + '>'
+
+ def unknown_endtag(self, tag):
+ print '*** unknown </' + tag + '>'
+
+
+# An extension of the formatting parser which formats anchors differently.
+class AnchoringParser(FormattingParser):
+
+ def start_a(self, attrs):
+ FormattingParser.start_a(self, attrs)
+ if self.inanchor:
+ self.fmt.bgn_anchor(self.inanchor)
+
+ def end_a(self):
+ if self.inanchor:
+ self.fmt.end_anchor(self.inanchor)
+ self.inanchor = 0
+
+
+# Style sheet -- this is never instantiated, but the attributes
+# of the class object itself are used to specify fonts to be used
+# for various paragraph styles.
+# A font set is a non-empty list of fonts, in the order:
+# [roman, italic, bold, fixed].
+# When a style is not available the nearest lower style is used
+
+ROMAN = 0
+ITALIC = 1
+BOLD = 2
+FIXED = 3
+
+class NullStylesheet:
+ # Fonts -- none
+ stdfontset = [None]
+ h1fontset = [None]
+ h2fontset = [None]
+ h3fontset = [None]
+ # Indents
+ stdindent = 2
+ ddindent = 25
+ ulindent = 4
+ h1indent = 0
+ h2indent = 0
+ literalindent = 0
+
+
+class X11Stylesheet(NullStylesheet):
+ stdfontset = [ \
+ '-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+ '-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \
+ '-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \
+ '-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \
+ ]
+ h1fontset = [ \
+ '-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \
+ '-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \
+ '-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \
+ ]
+ h2fontset = [ \
+ '-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \
+ '-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \
+ '-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \
+ ]
+ h3fontset = [ \
+ '-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \
+ '-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \
+ '-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \
+ ]
+ ddindent = 40
+
+
+class MacStylesheet(NullStylesheet):
+ stdfontset = [ \
+ ('Geneva', 'p', 10), \
+ ('Geneva', 'i', 10), \
+ ('Geneva', 'b', 10), \
+ ('Monaco', 'p', 10), \
+ ]
+ h1fontset = [ \
+ ('Geneva', 'p', 18), \
+ ('Geneva', 'i', 18), \
+ ('Geneva', 'b', 18), \
+ ('Monaco', 'p', 18), \
+ ]
+ h3fontset = [ \
+ ('Geneva', 'p', 14), \
+ ('Geneva', 'i', 14), \
+ ('Geneva', 'b', 14), \
+ ('Monaco', 'p', 14), \
+ ]
+ h3fontset = [ \
+ ('Geneva', 'p', 12), \
+ ('Geneva', 'i', 12), \
+ ('Geneva', 'b', 12), \
+ ('Monaco', 'p', 12), \
+ ]
+
+
+if os.name == 'mac':
+ StdwinStylesheet = MacStylesheet
+else:
+ StdwinStylesheet = X11Stylesheet
+
+
+class GLStylesheet(NullStylesheet):
+ stdfontset = [ \
+ 'Helvetica 10', \
+ 'Helvetica-Italic 10', \
+ 'Helvetica-Bold 10', \
+ 'Courier 10', \
+ ]
+ h1fontset = [ \
+ 'Helvetica 18', \
+ 'Helvetica-Italic 18', \
+ 'Helvetica-Bold 18', \
+ 'Courier 18', \
+ ]
+ h2fontset = [ \
+ 'Helvetica 14', \
+ 'Helvetica-Italic 14', \
+ 'Helvetica-Bold 14', \
+ 'Courier 14', \
+ ]
+ h3fontset = [ \
+ 'Helvetica 12', \
+ 'Helvetica-Italic 12', \
+ 'Helvetica-Bold 12', \
+ 'Courier 12', \
+ ]
+
+
+# Test program -- produces no output but times how long it takes
+# to send a document to a null formatter, exclusive of I/O
+
+def test():
+ import fmt
+ import time
+ if sys.argv[1:]: file = sys.argv[1]
+ else: file = 'test.html'
+ data = open(file, 'r').read()
+ t0 = time.time()
+ fmtr = fmt.WritingFormatter(sys.stdout, 79)
+ p = FormattingParser(fmtr, NullStylesheet)
+ p.feed(data)
+ p.close()
+ t1 = time.time()
+ print
+ print '*** Formatting time:', round(t1-t0, 3), 'seconds.'
+
+
+# Test program using stdwin
+
+def testStdwin():
+ import stdwin, fmt
+ from stdwinevents import *
+ if sys.argv[1:]: file = sys.argv[1]
+ else: file = 'test.html'
+ data = open(file, 'r').read()
+ window = stdwin.open('testStdwin')
+ b = None
+ while 1:
+ etype, ewin, edetail = stdwin.getevent()
+ if etype == WE_CLOSE:
+ break
+ if etype == WE_SIZE:
+ window.setdocsize(0, 0)
+ window.setorigin(0, 0)
+ window.change((0, 0), (10000, 30000)) # XXX
+ if etype == WE_DRAW:
+ if not b:
+ b = fmt.StdwinBackEnd(window, 1)
+ f = fmt.BaseFormatter(b.d, b)
+ p = FormattingParser(f, \
+ MacStylesheet)
+ p.feed(data)
+ p.close()
+ b.finish()
+ else:
+ b.redraw(edetail)
+ window.close()
+
+
+# Test program using GL
+
+def testGL():
+ import gl, GL, fmt
+ if sys.argv[1:]: file = sys.argv[1]
+ else: file = 'test.html'
+ data = open(file, 'r').read()
+ W, H = 600, 600
+ gl.foreground()
+ gl.prefsize(W, H)
+ wid = gl.winopen('testGL')
+ gl.ortho2(0, W, H, 0)
+ gl.color(GL.WHITE)
+ gl.clear()
+ gl.color(GL.BLACK)
+ b = fmt.GLBackEnd(wid)
+ f = fmt.BaseFormatter(b.d, b)
+ p = FormattingParser(f, GLStylesheet)
+ p.feed(data)
+ p.close()
+ b.finish()
+ #
+ import time
+ time.sleep(5)
+
+
+if __name__ == '__main__':
+ test()
diff --git a/Lib/lib-old/Para.py b/Lib/lib-old/Para.py
new file mode 100644
index 0000000..6a7057d
--- /dev/null
+++ b/Lib/lib-old/Para.py
@@ -0,0 +1,408 @@
+# Text formatting abstractions
+
+
+# Oft-used type object
+Int = type(0)
+
+
+# Represent a paragraph. This is a list of words with associated
+# font and size information, plus indents and justification for the
+# entire paragraph.
+# Once the words have been added to a paragraph, it can be laid out
+# for different line widths. Once laid out, it can be rendered at
+# different screen locations. Once rendered, it can be queried
+# for mouse hits, and parts of the text can be highlighted
+class Para:
+ #
+ def __init__(self):
+ self.words = [] # The words
+ self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
+ self.indent_left = self.indent_right = self.indent_hang = 0
+ # Final lay-out parameters, may change
+ self.left = self.top = self.right = self.bottom = \
+ self.width = self.height = self.lines = None
+ #
+ # Add a word, computing size information for it.
+ # Words may also be added manually by appending to self.words
+ # Each word should be a 7-tuple:
+ # (font, text, width, space, stretch, ascent, descent)
+ def addword(self, d, font, text, space, stretch):
+ if font <> None:
+ d.setfont(font)
+ width = d.textwidth(text)
+ ascent = d.baseline()
+ descent = d.lineheight() - ascent
+ spw = d.textwidth(' ')
+ space = space * spw
+ stretch = stretch * spw
+ tuple = (font, text, width, space, stretch, ascent, descent)
+ self.words.append(tuple)
+ #
+ # Hooks to begin and end anchors -- insert numbers in the word list!
+ def bgn_anchor(self, id):
+ self.words.append(id)
+ #
+ def end_anchor(self, id):
+ self.words.append(0)
+ #
+ # Return the total length (width) of the text added so far, in pixels
+ def getlength(self):
+ total = 0
+ for word in self.words:
+ if type(word) <> Int:
+ total = total + word[2] + word[3]
+ return total
+ #
+ # Tab to a given position (relative to the current left indent):
+ # remove all stretch, add fixed space up to the new indent.
+ # If the current position is already beying the tab stop,
+ # don't add any new space (but still remove the stretch)
+ def tabto(self, tab):
+ total = 0
+ as, de = 1, 0
+ for i in range(len(self.words)):
+ word = self.words[i]
+ if type(word) == Int: continue
+ fo, te, wi, sp, st, as, de = word
+ self.words[i] = fo, te, wi, sp, 0, as, de
+ total = total + wi + sp
+ if total < tab:
+ self.words.append(None, '', 0, tab-total, 0, as, de)
+ #
+ # Make a hanging tag: tab to hang, increment indent_left by hang,
+ # and reset indent_hang to -hang
+ def makehangingtag(self, hang):
+ self.tabto(hang)
+ self.indent_left = self.indent_left + hang
+ self.indent_hang = -hang
+ #
+ # Decide where the line breaks will be given some screen width
+ def layout(self, linewidth):
+ self.width = linewidth
+ height = 0
+ self.lines = lines = []
+ avail1 = self.width - self.indent_left - self.indent_right
+ avail = avail1 - self.indent_hang
+ words = self.words
+ i = 0
+ n = len(words)
+ lastfont = None
+ while i < n:
+ firstfont = lastfont
+ charcount = 0
+ width = 0
+ stretch = 0
+ ascent = 0
+ descent = 0
+ lsp = 0
+ j = i
+ while i < n:
+ word = words[i]
+ if type(word) == Int:
+ if word > 0 and width >= avail:
+ break
+ i = i+1
+ continue
+ fo, te, wi, sp, st, as, de = word
+ if width + wi > avail and width > 0 and wi > 0:
+ break
+ if fo <> None:
+ lastfont = fo
+ if width == 0:
+ firstfont = fo
+ charcount = charcount + len(te) + (sp > 0)
+ width = width + wi + sp
+ lsp = sp
+ stretch = stretch + st
+ lst = st
+ ascent = max(ascent, as)
+ descent = max(descent, de)
+ i = i+1
+ while i > j and type(words[i-1]) == Int and \
+ words[i-1] > 0: i = i-1
+ width = width - lsp
+ if i < n:
+ stretch = stretch - lst
+ else:
+ stretch = 0
+ tuple = i-j, firstfont, charcount, width, stretch, \
+ ascent, descent
+ lines.append(tuple)
+ height = height + ascent + descent
+ avail = avail1
+ self.height = height
+ #
+ # Call a function for all words in a line
+ def visit(self, wordfunc, anchorfunc):
+ avail1 = self.width - self.indent_left - self.indent_right
+ avail = avail1 - self.indent_hang
+ v = self.top
+ i = 0
+ for tuple in self.lines:
+ wordcount, firstfont, charcount, width, stretch, \
+ ascent, descent = tuple
+ h = self.left + self.indent_left
+ if i == 0: h = h + self.indent_hang
+ extra = 0
+ if self.just == 'r': h = h + avail - width
+ elif self.just == 'c': h = h + (avail - width) / 2
+ elif self.just == 'lr' and stretch > 0:
+ extra = avail - width
+ v2 = v + ascent + descent
+ for j in range(i, i+wordcount):
+ word = self.words[j]
+ if type(word) == Int:
+ ok = anchorfunc(self, tuple, word, \
+ h, v)
+ if ok <> None: return ok
+ continue
+ fo, te, wi, sp, st, as, de = word
+ if extra > 0 and stretch > 0:
+ ex = extra * st / stretch
+ extra = extra - ex
+ stretch = stretch - st
+ else:
+ ex = 0
+ h2 = h + wi + sp + ex
+ ok = wordfunc(self, tuple, word, h, v, \
+ h2, v2, (j==i), (j==i+wordcount-1))
+ if ok <> None: return ok
+ h = h2
+ v = v2
+ i = i + wordcount
+ avail = avail1
+ #
+ # Render a paragraph in "drawing object" d, using the rectangle
+ # given by (left, top, right) with an unspecified bottom.
+ # Return the computed bottom of the text.
+ def render(self, d, left, top, right):
+ if self.width <> right-left:
+ self.layout(right-left)
+ self.left = left
+ self.top = top
+ self.right = right
+ self.bottom = self.top + self.height
+ self.anchorid = 0
+ try:
+ self.d = d
+ self.visit(self.__class__._renderword, \
+ self.__class__._renderanchor)
+ finally:
+ self.d = None
+ return self.bottom
+ #
+ def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+ if word[0] <> None: self.d.setfont(word[0])
+ baseline = v + tuple[5]
+ self.d.text((h, baseline - word[5]), word[1])
+ if self.anchorid > 0:
+ self.d.line((h, baseline+2), (h2, baseline+2))
+ #
+ def _renderanchor(self, tuple, word, h, v):
+ self.anchorid = word
+ #
+ # Return which anchor(s) was hit by the mouse
+ def hitcheck(self, mouseh, mousev):
+ self.mouseh = mouseh
+ self.mousev = mousev
+ self.anchorid = 0
+ self.hits = []
+ self.visit(self.__class__._hitcheckword, \
+ self.__class__._hitcheckanchor)
+ return self.hits
+ #
+ def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
+ if self.anchorid > 0 and h <= self.mouseh <= h2 and \
+ v <= self.mousev <= v2:
+ self.hits.append(self.anchorid)
+ #
+ def _hitcheckanchor(self, tuple, word, h, v):
+ self.anchorid = word
+ #
+ # Return whether the given anchor id is present
+ def hasanchor(self, id):
+ return id in self.words or -id in self.words
+ #
+ # Extract the raw text from the word list, substituting one space
+ # for non-empty inter-word space, and terminating with '\n'
+ def extract(self):
+ text = ''
+ for w in self.words:
+ if type(w) <> Int:
+ word = w[1]
+ if w[3]: word = word + ' '
+ text = text + word
+ return text + '\n'
+ #
+ # Return which character position was hit by the mouse, as
+ # an offset in the entire text as returned by extract().
+ # Return None if the mouse was not in this paragraph
+ def whereis(self, d, mouseh, mousev):
+ if mousev < self.top or mousev > self.bottom:
+ return None
+ self.mouseh = mouseh
+ self.mousev = mousev
+ self.lastfont = None
+ self.charcount = 0
+ try:
+ self.d = d
+ return self.visit(self.__class__._whereisword, \
+ self.__class__._whereisanchor)
+ finally:
+ self.d = None
+ #
+ def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+ fo, te, wi, sp, st, as, de = word
+ if fo <> None: self.lastfont = fo
+ h = h1
+ if isfirst: h1 = 0
+ if islast: h2 = 999999
+ if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
+ self.charcount = self.charcount + len(te) + (sp > 0)
+ return
+ if self.lastfont <> None:
+ self.d.setfont(self.lastfont)
+ cc = 0
+ for c in te:
+ cw = self.d.textwidth(c)
+ if self.mouseh <= h + cw/2:
+ return self.charcount + cc
+ cc = cc+1
+ h = h+cw
+ self.charcount = self.charcount + cc
+ if self.mouseh <= (h+h2) / 2:
+ return self.charcount
+ else:
+ return self.charcount + 1
+ #
+ def _whereisanchor(self, tuple, word, h, v):
+ pass
+ #
+ # Return screen position corresponding to position in paragraph.
+ # Return tuple (h, vtop, vbaseline, vbottom).
+ # This is more or less the inverse of whereis()
+ def screenpos(self, d, pos):
+ if pos < 0:
+ ascent, descent = self.lines[0][5:7]
+ return self.left, self.top, self.top + ascent, \
+ self.top + ascent + descent
+ self.pos = pos
+ self.lastfont = None
+ try:
+ self.d = d
+ ok = self.visit(self.__class__._screenposword, \
+ self.__class__._screenposanchor)
+ finally:
+ self.d = None
+ if ok == None:
+ ascent, descent = self.lines[-1][5:7]
+ ok = self.right, self.bottom - ascent - descent, \
+ self.bottom - descent, self.bottom
+ return ok
+ #
+ def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
+ fo, te, wi, sp, st, as, de = word
+ if fo <> None: self.lastfont = fo
+ cc = len(te) + (sp > 0)
+ if self.pos > cc:
+ self.pos = self.pos - cc
+ return
+ if self.pos < cc:
+ self.d.setfont(self.lastfont)
+ h = h1 + self.d.textwidth(te[:self.pos])
+ else:
+ h = h2
+ ascent, descent = tuple[5:7]
+ return h, v1, v1+ascent, v2
+ #
+ def _screenposanchor(self, tuple, word, h, v):
+ pass
+ #
+ # Invert the stretch of text between pos1 and pos2.
+ # If pos1 is None, the beginning is implied;
+ # if pos2 is None, the end is implied.
+ # Undoes its own effect when called again with the same arguments
+ def invert(self, d, pos1, pos2):
+ if pos1 == None:
+ pos1 = self.left, self.top, self.top, self.top
+ else:
+ pos1 = self.screenpos(d, pos1)
+ if pos2 == None:
+ pos2 = self.right, self.bottom,self.bottom,self.bottom
+ else:
+ pos2 = self.screenpos(d, pos2)
+ h1, top1, baseline1, bottom1 = pos1
+ h2, top2, baseline2, bottom2 = pos2
+ if bottom1 <= top2:
+ d.invert((h1, top1), (self.right, bottom1))
+ h1 = self.left
+ if bottom1 < top2:
+ d.invert((h1, bottom1), (self.right, top2))
+ top1, bottom1 = top2, bottom2
+ d.invert((h1, top1), (h2, bottom2))
+
+
+# Test class Para
+# XXX This was last used on the Mac, hence the weird fonts...
+def test():
+ import stdwin
+ from stdwinevents import *
+ words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \
+ 'the', 'lazy', 'dog.'
+ paralist = []
+ for just in 'l', 'r', 'lr', 'c':
+ p = Para()
+ p.just = just
+ p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1)
+ for word in words[1:-1]:
+ p.addword(stdwin, None, word, 1, 1)
+ p.addword(stdwin, None, words[-1], 2, 4)
+ p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0)
+ p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0)
+ paralist.append(p)
+ window = stdwin.open('Para.test()')
+ start = stop = selpara = None
+ while 1:
+ etype, win, detail = stdwin.getevent()
+ if etype == WE_CLOSE:
+ break
+ if etype == WE_SIZE:
+ window.change((0, 0), (1000, 1000))
+ if etype == WE_DRAW:
+ width, height = window.getwinsize()
+ d = None
+ try:
+ d = window.begindrawing()
+ d.cliprect(detail)
+ d.erase(detail)
+ v = 0
+ for p in paralist:
+ v = p.render(d, 0, v, width)
+ if p == selpara and \
+ start <> None and stop <> None:
+ p.invert(d, start, stop)
+ finally:
+ if d: d.close()
+ if etype == WE_MOUSE_DOWN:
+ if selpara and start <> None and stop <> None:
+ d = window.begindrawing()
+ selpara.invert(d, start, stop)
+ d.close()
+ start = stop = selpara = None
+ mouseh, mousev = detail[0]
+ for p in paralist:
+ start = p.whereis(stdwin, mouseh, mousev)
+ if start <> None:
+ selpara = p
+ break
+ if etype == WE_MOUSE_UP and start <> None and selpara:
+ mouseh, mousev = detail[0]
+ stop = selpara.whereis(stdwin, mouseh, mousev)
+ if stop == None: start = selpara = None
+ else:
+ if start > stop:
+ start, stop = stop, start
+ d = window.begindrawing()
+ selpara.invert(d, start, stop)
+ d.close()
+ window.close()
diff --git a/Lib/lib-old/fmt.py b/Lib/lib-old/fmt.py
new file mode 100644
index 0000000..c096306
--- /dev/null
+++ b/Lib/lib-old/fmt.py
@@ -0,0 +1,621 @@
+# Text formatting abstractions
+
+
+import string
+import Para
+
+
+# A formatter back-end object has one method that is called by the formatter:
+# addpara(p), where p is a paragraph object. For example:
+
+
+# Formatter back-end to do nothing at all with the paragraphs
+class NullBackEnd:
+ #
+ def __init__(self):
+ pass
+ #
+ def addpara(self, p):
+ pass
+ #
+ def bgn_anchor(self, id):
+ pass
+ #
+ def end_anchor(self, id):
+ pass
+
+
+# Formatter back-end to collect the paragraphs in a list
+class SavingBackEnd(NullBackEnd):
+ #
+ def __init__(self):
+ self.paralist = []
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ #
+ def hitcheck(self, h, v):
+ hits = []
+ for p in self.paralist:
+ if p.top <= v <= p.bottom:
+ for id in p.hitcheck(h, v):
+ if id not in hits:
+ hits.append(id)
+ return hits
+ #
+ def extract(self):
+ text = ''
+ for p in self.paralist:
+ text = text + (p.extract())
+ return text
+ #
+ def extractpart(self, long1, long2):
+ if long1 > long2: long1, long2 = long2, long1
+ para1, pos1 = long1
+ para2, pos2 = long2
+ text = ''
+ while para1 < para2:
+ ptext = self.paralist[para1].extract()
+ text = text + ptext[pos1:]
+ pos1 = 0
+ para1 = para1 + 1
+ ptext = self.paralist[para2].extract()
+ return text + ptext[pos1:pos2]
+ #
+ def whereis(self, d, h, v):
+ total = 0
+ for i in range(len(self.paralist)):
+ p = self.paralist[i]
+ result = p.whereis(d, h, v)
+ if result <> None:
+ return i, result
+ return None
+ #
+ def roundtowords(self, long1, long2):
+ i, offset = long1
+ text = self.paralist[i].extract()
+ while offset > 0 and text[offset-1] <> ' ': offset = offset-1
+ long1 = i, offset
+ #
+ i, offset = long2
+ text = self.paralist[i].extract()
+ n = len(text)
+ while offset < n-1 and text[offset] <> ' ': offset = offset+1
+ long2 = i, offset
+ #
+ return long1, long2
+ #
+ def roundtoparagraphs(self, long1, long2):
+ long1 = long1[0], 0
+ long2 = long2[0], len(self.paralist[long2[0]].extract())
+ return long1, long2
+
+
+# Formatter back-end to send the text directly to the drawing object
+class WritingBackEnd(NullBackEnd):
+ #
+ def __init__(self, d, width):
+ self.d = d
+ self.width = width
+ self.lineno = 0
+ #
+ def addpara(self, p):
+ self.lineno = p.render(self.d, 0, self.lineno, self.width)
+
+
+# A formatter receives a stream of formatting instructions and assembles
+# these into a stream of paragraphs on to a back-end. The assembly is
+# parametrized by a text measurement object, which must match the output
+# operations of the back-end. The back-end is responsible for splitting
+# paragraphs up in lines of a given maximum width. (This is done because
+# in a windowing environment, when the window size changes, there is no
+# need to redo the assembly into paragraphs, but the splitting into lines
+# must be done taking the new window size into account.)
+
+
+# Formatter base class. Initialize it with a text measurement object,
+# which is used for text measurements, and a back-end object,
+# which receives the completed paragraphs. The formatting methods are:
+# setfont(font)
+# setleftindent(nspaces)
+# setjust(type) where type is 'l', 'c', 'r', or 'lr'
+# flush()
+# vspace(nlines)
+# needvspace(nlines)
+# addword(word, nspaces)
+class BaseFormatter:
+ #
+ def __init__(self, d, b):
+ # Drawing object used for text measurements
+ self.d = d
+ #
+ # BackEnd object receiving completed paragraphs
+ self.b = b
+ #
+ # Parameters of the formatting model
+ self.leftindent = 0
+ self.just = 'l'
+ self.font = None
+ self.blanklines = 0
+ #
+ # Parameters derived from the current font
+ self.space = d.textwidth(' ')
+ self.line = d.lineheight()
+ self.ascent = d.baseline()
+ self.descent = self.line - self.ascent
+ #
+ # Parameter derived from the default font
+ self.n_space = self.space
+ #
+ # Current paragraph being built
+ self.para = None
+ self.nospace = 1
+ #
+ # Font to set on the next word
+ self.nextfont = None
+ #
+ def newpara(self):
+ return Para.Para()
+ #
+ def setfont(self, font):
+ if font == None: return
+ self.font = self.nextfont = font
+ d = self.d
+ d.setfont(font)
+ self.space = d.textwidth(' ')
+ self.line = d.lineheight()
+ self.ascent = d.baseline()
+ self.descent = self.line - self.ascent
+ #
+ def setleftindent(self, nspaces):
+ self.leftindent = int(self.n_space * nspaces)
+ if self.para:
+ hang = self.leftindent - self.para.indent_left
+ if hang > 0 and self.para.getlength() <= hang:
+ self.para.makehangingtag(hang)
+ self.nospace = 1
+ else:
+ self.flush()
+ #
+ def setrightindent(self, nspaces):
+ self.rightindent = int(self.n_space * nspaces)
+ if self.para:
+ self.para.indent_right = self.rightindent
+ self.flush()
+ #
+ def setjust(self, just):
+ self.just = just
+ if self.para:
+ self.para.just = self.just
+ #
+ def flush(self):
+ if self.para:
+ self.b.addpara(self.para)
+ self.para = None
+ if self.font <> None:
+ self.d.setfont(self.font)
+ self.nospace = 1
+ #
+ def vspace(self, nlines):
+ self.flush()
+ if nlines > 0:
+ self.para = self.newpara()
+ tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
+ self.para.words.append(tuple)
+ self.flush()
+ self.blanklines = self.blanklines + nlines
+ #
+ def needvspace(self, nlines):
+ self.flush() # Just to be sure
+ if nlines > self.blanklines:
+ self.vspace(nlines - self.blanklines)
+ #
+ def addword(self, text, space):
+ if self.nospace and not text:
+ return
+ self.nospace = 0
+ self.blanklines = 0
+ if not self.para:
+ self.para = self.newpara()
+ self.para.indent_left = self.leftindent
+ self.para.just = self.just
+ self.nextfont = self.font
+ space = int(space * self.space)
+ self.para.words.append(self.nextfont, text, \
+ self.d.textwidth(text), space, space, \
+ self.ascent, self.descent)
+ self.nextfont = None
+ #
+ def bgn_anchor(self, id):
+ if not self.para:
+ self.nospace = 0
+ self.addword('', 0)
+ self.para.bgn_anchor(id)
+ #
+ def end_anchor(self, id):
+ if not self.para:
+ self.nospace = 0
+ self.addword('', 0)
+ self.para.end_anchor(id)
+
+
+# Measuring object for measuring text as viewed on a tty
+class NullMeasurer:
+ #
+ def __init__(self):
+ pass
+ #
+ def setfont(self, font):
+ pass
+ #
+ def textwidth(self, text):
+ return len(text)
+ #
+ def lineheight(self):
+ return 1
+ #
+ def baseline(self):
+ return 0
+
+
+# Drawing object for writing plain ASCII text to a file
+class FileWriter:
+ #
+ def __init__(self, fp):
+ self.fp = fp
+ self.lineno, self.colno = 0, 0
+ #
+ def setfont(self, font):
+ pass
+ #
+ def text(self, (h, v), str):
+ if not str: return
+ if '\n' in str:
+ raise ValueError, 'can\'t write \\n'
+ while self.lineno < v:
+ self.fp.write('\n')
+ self.colno, self.lineno = 0, self.lineno + 1
+ while self.lineno > v:
+ # XXX This should never happen...
+ self.fp.write('\033[A') # ANSI up arrow
+ self.lineno = self.lineno - 1
+ if self.colno < h:
+ self.fp.write(' ' * (h - self.colno))
+ elif self.colno > h:
+ self.fp.write('\b' * (self.colno - h))
+ self.colno = h
+ self.fp.write(str)
+ self.colno = h + len(str)
+
+
+# Formatting class to do nothing at all with the data
+class NullFormatter(BaseFormatter):
+ #
+ def __init__(self):
+ d = NullMeasurer()
+ b = NullBackEnd()
+ BaseFormatter.__init__(self, d, b)
+
+
+# Formatting class to write directly to a file
+class WritingFormatter(BaseFormatter):
+ #
+ def __init__(self, fp, width):
+ dm = NullMeasurer()
+ dw = FileWriter(fp)
+ b = WritingBackEnd(dw, width)
+ BaseFormatter.__init__(self, dm, b)
+ self.blanklines = 1
+ #
+ # Suppress multiple blank lines
+ def needvspace(self, nlines):
+ BaseFormatter.needvspace(self, min(1, nlines))
+
+
+# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
+# _italic text_ and _underlined words_, and `quoted text'.
+# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
+# italic, bold, underline, quote).
+# Moreover, if the font is in upper case, the text is converted to
+# UPPER CASE.
+class FunnyFormatter(WritingFormatter):
+ #
+ def flush(self):
+ if self.para: finalize(self.para)
+ WritingFormatter.flush(self)
+
+
+# Surrounds *bold words* and _italic text_ in a paragraph with
+# appropriate markers, fixing the size (assuming these characters'
+# width is 1).
+openchar = \
+ {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
+closechar = \
+ {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
+def finalize(para):
+ oldfont = curfont = 'r'
+ para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end
+ for i in range(len(para.words)):
+ fo, te, wi = para.words[i][:3]
+ if fo <> None: curfont = fo
+ if curfont <> oldfont:
+ if closechar.has_key(oldfont):
+ c = closechar[oldfont]
+ j = i-1
+ while j > 0 and para.words[j][1] == '': j = j-1
+ fo1, te1, wi1 = para.words[j][:3]
+ te1 = te1 + c
+ wi1 = wi1 + len(c)
+ para.words[j] = (fo1, te1, wi1) + \
+ para.words[j][3:]
+ if openchar.has_key(curfont) and te:
+ c = openchar[curfont]
+ te = c + te
+ wi = len(c) + wi
+ para.words[i] = (fo, te, wi) + \
+ para.words[i][3:]
+ if te: oldfont = curfont
+ else: oldfont = 'r'
+ if curfont in string.uppercase:
+ te = string.upper(te)
+ para.words[i] = (fo, te, wi) + para.words[i][3:]
+ del para.words[-1]
+
+
+# Formatter back-end to draw the text in a window.
+# This has an option to draw while the paragraphs are being added,
+# to minimize the delay before the user sees anything.
+# This manages the entire "document" of the window.
+class StdwinBackEnd(SavingBackEnd):
+ #
+ def __init__(self, window, drawnow):
+ self.window = window
+ self.drawnow = drawnow
+ self.width = window.getwinsize()[0]
+ self.selection = None
+ self.height = 0
+ window.setorigin(0, 0)
+ window.setdocsize(0, 0)
+ self.d = window.begindrawing()
+ SavingBackEnd.__init__(self)
+ #
+ def finish(self):
+ self.d.close()
+ self.d = None
+ self.window.setdocsize(0, self.height)
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ if self.drawnow:
+ self.height = \
+ p.render(self.d, 0, self.height, self.width)
+ else:
+ p.layout(self.width)
+ p.left = 0
+ p.top = self.height
+ p.right = self.width
+ p.bottom = self.height + p.height
+ self.height = p.bottom
+ #
+ def resize(self):
+ self.window.change((0, 0), (self.width, self.height))
+ self.width = self.window.getwinsize()[0]
+ self.height = 0
+ for p in self.paralist:
+ p.layout(self.width)
+ p.left = 0
+ p.top = self.height
+ p.right = self.width
+ p.bottom = self.height + p.height
+ self.height = p.bottom
+ self.window.change((0, 0), (self.width, self.height))
+ self.window.setdocsize(0, self.height)
+ #
+ def redraw(self, area):
+ d = self.window.begindrawing()
+ (left, top), (right, bottom) = area
+ d.erase(area)
+ d.cliprect(area)
+ for p in self.paralist:
+ if top < p.bottom and p.top < bottom:
+ v = p.render(d, p.left, p.top, p.right)
+ if self.selection:
+ self.invert(d, self.selection)
+ d.close()
+ #
+ def setselection(self, new):
+ if new:
+ long1, long2 = new
+ pos1 = long1[:3]
+ pos2 = long2[:3]
+ new = pos1, pos2
+ if new <> self.selection:
+ d = self.window.begindrawing()
+ if self.selection:
+ self.invert(d, self.selection)
+ if new:
+ self.invert(d, new)
+ d.close()
+ self.selection = new
+ #
+ def getselection(self):
+ return self.selection
+ #
+ def extractselection(self):
+ if self.selection:
+ a, b = self.selection
+ return self.extractpart(a, b)
+ else:
+ return None
+ #
+ def invert(self, d, region):
+ long1, long2 = region
+ if long1 > long2: long1, long2 = long2, long1
+ para1, pos1 = long1
+ para2, pos2 = long2
+ while para1 < para2:
+ self.paralist[para1].invert(d, pos1, None)
+ pos1 = None
+ para1 = para1 + 1
+ self.paralist[para2].invert(d, pos1, pos2)
+ #
+ def search(self, prog):
+ import regex, string
+ if type(prog) == type(''):
+ prog = regex.compile(string.lower(prog))
+ if self.selection:
+ iold = self.selection[0][0]
+ else:
+ iold = -1
+ hit = None
+ for i in range(len(self.paralist)):
+ if i == iold or i < iold and hit:
+ continue
+ p = self.paralist[i]
+ text = string.lower(p.extract())
+ if prog.search(text) >= 0:
+ a, b = prog.regs[0]
+ long1 = i, a
+ long2 = i, b
+ hit = long1, long2
+ if i > iold:
+ break
+ if hit:
+ self.setselection(hit)
+ i = hit[0][0]
+ p = self.paralist[i]
+ self.window.show((p.left, p.top), (p.right, p.bottom))
+ return 1
+ else:
+ return 0
+ #
+ def showanchor(self, id):
+ for i in range(len(self.paralist)):
+ p = self.paralist[i]
+ if p.hasanchor(id):
+ long1 = i, 0
+ long2 = i, len(p.extract())
+ hit = long1, long2
+ self.setselection(hit)
+ self.window.show( \
+ (p.left, p.top), (p.right, p.bottom))
+ break
+
+
+# GL extensions
+
+class GLFontCache:
+ #
+ def __init__(self):
+ self.reset()
+ self.setfont('')
+ #
+ def reset(self):
+ self.fontkey = None
+ self.fonthandle = None
+ self.fontinfo = None
+ self.fontcache = {}
+ #
+ def close(self):
+ self.reset()
+ #
+ def setfont(self, fontkey):
+ if fontkey == '':
+ fontkey = 'Times-Roman 12'
+ elif ' ' not in fontkey:
+ fontkey = fontkey + ' 12'
+ if fontkey == self.fontkey:
+ return
+ if self.fontcache.has_key(fontkey):
+ handle = self.fontcache[fontkey]
+ else:
+ import string
+ i = string.index(fontkey, ' ')
+ name, sizestr = fontkey[:i], fontkey[i:]
+ size = eval(sizestr)
+ key1 = name + ' 1'
+ key = name + ' ' + `size`
+ # NB key may differ from fontkey!
+ if self.fontcache.has_key(key):
+ handle = self.fontcache[key]
+ else:
+ if self.fontcache.has_key(key1):
+ handle = self.fontcache[key1]
+ else:
+ import fm
+ handle = fm.findfont(name)
+ self.fontcache[key1] = handle
+ handle = handle.scalefont(size)
+ self.fontcache[fontkey] = \
+ self.fontcache[key] = handle
+ self.fontkey = fontkey
+ if self.fonthandle <> handle:
+ self.fonthandle = handle
+ self.fontinfo = handle.getfontinfo()
+ handle.setfont()
+
+
+class GLMeasurer(GLFontCache):
+ #
+ def textwidth(self, text):
+ return self.fonthandle.getstrwidth(text)
+ #
+ def baseline(self):
+ return self.fontinfo[6] - self.fontinfo[3]
+ #
+ def lineheight(self):
+ return self.fontinfo[6]
+
+
+class GLWriter(GLFontCache):
+ #
+ # NOTES:
+ # (1) Use gl.ortho2 to use X pixel coordinates!
+ #
+ def text(self, (h, v), text):
+ import gl, fm
+ gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
+ fm.prstr(text)
+ #
+ def setfont(self, fontkey):
+ oldhandle = self.fonthandle
+ GLFontCache.setfont(fontkey)
+ if self.fonthandle <> oldhandle:
+ handle.setfont()
+
+
+class GLMeasurerWriter(GLMeasurer, GLWriter):
+ pass
+
+
+class GLBackEnd(SavingBackEnd):
+ #
+ def __init__(self, wid):
+ import gl
+ gl.winset(wid)
+ self.wid = wid
+ self.width = gl.getsize()[1]
+ self.height = 0
+ self.d = GLMeasurerWriter()
+ SavingBackEnd.__init__(self)
+ #
+ def finish(self):
+ pass
+ #
+ def addpara(self, p):
+ self.paralist.append(p)
+ self.height = p.render(self.d, 0, self.height, self.width)
+ #
+ def redraw(self):
+ import gl
+ gl.winset(self.wid)
+ width = gl.getsize()[1]
+ if width <> self.width:
+ setdocsize = 1
+ self.width = width
+ for p in self.paralist:
+ p.top = p.bottom = None
+ d = self.d
+ v = 0
+ for p in self.paralist:
+ v = p.render(d, 0, v, width)
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
new file mode 100644
index 0000000..af75e0d
--- /dev/null
+++ b/Lib/sgmllib.py
@@ -0,0 +1,321 @@
+# A parser for SGML, using the derived class as static DTD.
+
+# XXX This only supports those SGML features used by HTML.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import regex
+import string
+
+
+# Regular expressions used for parsing
+
+incomplete = regex.compile( \
+ '<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \
+ '&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&')
+entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]')
+charref = regex.compile('&#[a-zA-Z0-9]+;')
+starttagopen = regex.compile('<[a-zA-Z]')
+endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>')
+commentopen = regex.compile('<!--')
+
+
+# SGML parser base class -- find tags and call handler functions.
+# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
+# The dtd is defined by deriving a class which defines methods
+# with special names to handle tags: start_foo and end_foo to handle
+# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
+# (Tags are converted to lower case for this purpose.) The data
+# between tags is passed to the parser by calling self.handle_data()
+# with some data as argument (the data may be split up in arbutrary
+# chunks). Entity references are passed by calling
+# self.handle_entityref() with the entity reference as argument.
+
+class SGMLParser:
+
+ # Interface -- initialize and reset this instance
+ def __init__(self):
+ self.reset()
+
+ # Interface -- reset this instance. Loses all unprocessed data
+ def reset(self):
+ self.rawdata = ''
+ self.stack = []
+ self.nomoretags = 0
+ self.literal = 0
+
+ # For derived classes only -- enter literal mode (CDATA) till EOF
+ def setnomoretags(self):
+ self.nomoretags = self.literal = 1
+
+ # For derived classes only -- enter literal mode (CDATA)
+ def setliteral(self, *args):
+ self.literal = 1
+
+ # Interface -- feed some data to the parser. Call this as
+ # often as you want, with as little or as much text as you
+ # want (may include '\n'). (This just saves the text, all the
+ # processing is done by process() or close().)
+ def feed(self, data):
+ self.rawdata = self.rawdata + data
+ self.goahead(0)
+
+ # Interface -- handle the remaining data
+ def close(self):
+ self.goahead(1)
+
+ # Internal -- handle data as far as reasonable. May leave state
+ # and data to be processed by a subsequent call. If 'end' is
+ # true, force handling all data as if followed by EOF marker.
+ def goahead(self, end):
+ rawdata = self.rawdata
+ i = 0
+ n = len(rawdata)
+ while i < n:
+ if self.nomoretags:
+ self.handle_data(rawdata[i:n])
+ i = n
+ break
+ j = incomplete.search(rawdata, i)
+ if j < 0: j = n
+ if i < j: self.handle_data(rawdata[i:j])
+ i = j
+ if i == n: break
+ if rawdata[i] == '<':
+ if starttagopen.match(rawdata, i) >= 0:
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_starttag(i)
+ if k < 0: break
+ i = i + k
+ continue
+ k = endtag.match(rawdata, i)
+ if k >= 0:
+ j = i+k
+ self.parse_endtag(rawdata[i:j])
+ i = j
+ self.literal = 0
+ continue
+ if commentopen.match(rawdata, i) >= 0:
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_comment(i)
+ if k < 0: break
+ i = i+k
+ continue
+ elif rawdata[i] == '&':
+ k = charref.match(rawdata, i)
+ if k >= 0:
+ j = i+k
+ self.handle_charref(rawdata[i+2:j-1])
+ i = j
+ continue
+ k = entityref.match(rawdata, i)
+ if k >= 0:
+ j = i+k
+ self.handle_entityref(rawdata[i+1:j-1])
+ i = j
+ continue
+ else:
+ raise RuntimeError, 'neither < nor & ??'
+ # We get here only if incomplete matches but
+ # nothing else
+ k = incomplete.match(rawdata, i)
+ if k < 0: raise RuntimeError, 'no incomplete match ??'
+ j = i+k
+ if j == n: break # Really incomplete
+ self.handle_data(rawdata[i:j])
+ i = j
+ # end while
+ if end and i < n:
+ self.handle_data(rawdata[i:n])
+ i = n
+ self.rawdata = rawdata[i:]
+ # XXX if end: check for empty stack
+
+ # Internal -- parse comment, return length or -1 if not ternimated
+ def parse_comment(self, i):
+ rawdata = self.rawdata
+ if rawdata[i:i+4] <> '<!--':
+ raise RuntimeError, 'unexpected call to handle_comment'
+ try:
+ j = string.index(rawdata, '--', i+4)
+ except string.index_error:
+ return -1
+ self.handle_comment(rawdata[i+4: j])
+ j = j+2
+ n = len(rawdata)
+ while j < n and rawdata[j] in ' \t\n': j = j+1
+ if j == n: return -1 # Wait for final '>'
+ if rawdata[j] == '>':
+ j = j+1
+ else:
+ print '*** comment not terminated with >'
+ print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5])
+ return j-i
+
+ # Internal -- handle starttag, return length or -1 if not terminated
+ def parse_starttag(self, i):
+ rawdata = self.rawdata
+ try:
+ j = string.index(rawdata, '>', i)
+ except string.index_error:
+ return -1
+ # Now parse the data between i+1 and j into a tag and attrs
+ attrs = []
+ tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*')
+ attrfind = regex.compile( \
+ '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \
+ '\([ \t\n]*=[ \t\n]*' + \
+ '\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?')
+ k = tagfind.match(rawdata, i+1)
+ if k < 0:
+ raise RuntimeError, 'unexpected call to parse_starttag'
+ k = i+1+k
+ tag = string.lower(rawdata[i+1:k])
+ while k < j:
+ l = attrfind.match(rawdata, k)
+ if l < 0: break
+ regs = attrfind.regs
+ a1, b1 = regs[1]
+ a2, b2 = regs[2]
+ a3, b3 = regs[3]
+ attrname = rawdata[a1:b1]
+ if '=' in rawdata[k:k+l]:
+ attrvalue = rawdata[a3:b3]
+ if attrvalue[:1] == '\'' == attrvalue[-1:] or \
+ attrvalue[:1] == '"' == attrvalue[-1:]:
+ attrvalue = attrvalue[1:-1]
+ else:
+ attrvalue = ''
+ attrs.append(string.lower(attrname), attrvalue)
+ k = k + l
+ j = j+1
+ try:
+ method = getattr(self, 'start_' + tag)
+ except AttributeError:
+ try:
+ method = getattr(self, 'do_' + tag)
+ except AttributeError:
+ self.unknown_starttag(tag, attrs)
+ return j-i
+ method(attrs)
+ return j-i
+ self.stack.append(tag)
+ method(attrs)
+ return j-i
+
+ # Internal -- parse endtag
+ def parse_endtag(self, data):
+ if data[:2] <> '</' or data[-1:] <> '>':
+ raise RuntimeError, 'unexpected call to parse_endtag'
+ tag = string.lower(string.strip(data[2:-1]))
+ try:
+ method = getattr(self, 'end_' + tag)
+ except AttributeError:
+ self.unknown_endtag(tag)
+ return
+ if self.stack and self.stack[-1] == tag:
+ del self.stack[-1]
+ else:
+ print '*** Unbalanced </' + tag + '>'
+ print '*** Stack:', self.stack
+ found = None
+ for i in range(len(self.stack)):
+ if self.stack[i] == tag: found = i
+ if found <> None:
+ del self.stack[found:]
+ method()
+
+ # Example -- handle character reference, no need to override
+ def handle_charref(self, name):
+ try:
+ n = string.atoi(name)
+ except string.atoi_error:
+ self.unknown_charref(name)
+ return
+ if not 0 <= n <= 255:
+ self.unknown_charref(name)
+ return
+ self.handle_data(chr(n))
+
+ # Definition of entities -- derived classes may override
+ entitydefs = \
+ {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
+
+ # Example -- handle entity reference, no need to override
+ def handle_entityref(self, name):
+ table = self.__class__.entitydefs
+ name = string.lower(name)
+ if table.has_key(name):
+ self.handle_data(table[name])
+ else:
+ self.unknown_entityref(name)
+ return
+
+ # Example -- handle data, should be overridden
+ def handle_data(self, data):
+ pass
+
+ # Example -- handle comment, could be overridden
+ def handle_comment(self, data):
+ pass
+
+ # To be overridden -- handlers for unknown objects
+ def unknown_starttag(self, tag, attrs): pass
+ def unknown_endtag(self, tag): pass
+ def unknown_charref(self, ref): pass
+ def unknown_entityref(self, ref): pass
+
+
+class TestSGML(SGMLParser):
+
+ def handle_data(self, data):
+ r = repr(data)
+ if len(r) > 72:
+ r = r[:35] + '...' + r[-35:]
+ print 'data:', r
+
+ def handle_comment(self, data):
+ r = repr(data)
+ if len(r) > 68:
+ r = r[:32] + '...' + r[-32:]
+ print 'comment:', r
+
+ def unknown_starttag(self, tag, attrs):
+ print 'start tag: <' + tag,
+ for name, value in attrs:
+ print name + '=' + '"' + value + '"',
+ print '>'
+
+ def unknown_endtag(self, tag):
+ print 'end tag: </' + tag + '>'
+
+ def unknown_entityref(self, ref):
+ print '*** unknown entity ref: &' + ref + ';'
+
+ def unknown_charref(self, ref):
+ print '*** unknown char ref: &#' + ref + ';'
+
+
+def test():
+ file = 'test.html'
+ f = open(file, 'r')
+ x = TestSGML()
+ while 1:
+ line = f.readline()
+ if not line:
+ x.close()
+ break
+ x.feed(line)
+
+
+#test()