summaryrefslogtreecommitdiffstats
path: root/Doc/tools/sgmlconv/esistools.py
blob: b89476ca087c63a3b27ad67fd175d908a37fff19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Miscellaneous utility functions useful for dealing with ESIS streams."""
__version__ = '$Revision$'

import re
import string
import sys
import xml.dom.core
import xml.dom.esis_builder


_data_rx = re.compile(r"[^\\][^\\]*")

def decode(s):
    r = ''
    while s:
        m = _data_rx.match(s)
        if m:
            r = r + m.group()
            s = s[len(m.group()):]
        elif s[1] == "\\":
            r = r + "\\"
            s = s[2:]
        elif s[1] == "n":
            r = r + "\n"
            s = s[2:]
        else:
            raise ValueError, "can't handle " + `s`
    return r


_charmap = {}
for c in map(chr, range(256)):
    _charmap[c] = c
_charmap["\n"] = r"\n"
_charmap["\\"] = r"\\"
del c

def encode(s):
    return string.join(map(_charmap.get, s), '')


class ExtendedEsisBuilder(xml.dom.esis_builder.EsisBuilder):
    def __init__(self, *args, **kw):
        self.__empties = {}
        self.__is_empty = 0
        apply(xml.dom.esis_builder.EsisBuilder.__init__, (self,) + args, kw)
        self.buildFragment()

    def feed(self, data):
        for line in string.split(data, '\n'):
            if not line: 
                break
            event = line[0]
            text = line[1:]
            if event == '(':
                element = self.document.createElement(text, self.attr_store)
                self.attr_store = {}
                self.push(element)
                if self.__is_empty:
                    self.__empties[text] = text
                    self.__is_empty = 0
            elif event == ')':
                self.pop()
            elif event == 'A':
                l = re.split(' ', text, 2)
                name = l[0]
                value = decode(l[2])
                self.attr_store[name] = value
            elif event == '-':
                text = self.document.createText(decode(text))
                self.push(text)
            elif event == 'C':
                return
            elif event == 'e':
                self.__is_empty = 1
            elif event == '&':
                eref = self.document.createEntityReference(text)
                self.push(eref)
            else:
                sys.stderr.write('Unknown event: %s\n' % line)

    def get_empties(self):
        return self.__empties.keys()