Tools/c-analyzer/table-file.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154


KINDS = [
    'section-major',
    'section-minor',
    'section-group',
    'row',
]


def iter_clean_lines(lines):
    lines = iter(lines)
    for rawline in lines:
        line = rawline.strip()
        if line.startswith('#') and not rawline.startswith('##'):
            continue
        yield line, rawline


def parse_table_lines(lines):
    lines = iter_clean_lines(lines)

    group = None
    prev = ''
    for line, rawline in lines:
        if line.startswith('## '):
            assert not rawline.startswith(' '), (line, rawline)
            if group:
                assert prev, (line, rawline)
                kind, after, _ = group
                assert kind and kind != 'section-group', (group, line, rawline)
                assert after is not None, (group, line, rawline)
            else:
                assert not prev, (prev, line, rawline)
                kind, after = group = ('section-group', None)
            title = line[3:].lstrip()
            assert title, (line, rawline)
            if after is not None:
                try:
                    line, rawline = next(lines)
                except StopIteration:
                    line = None
                if line != after:
                    raise NotImplementedError((group, line, rawline))
            yield kind, title
            group = None
        elif group:
            raise NotImplementedError((group, line, rawline))
        elif line.startswith('##---'):
            assert line.rstrip('-') == '##', (line, rawline)
            group = ('section-minor', '', line)
        elif line.startswith('#####'):
            assert not line.strip('#'), (line, rawline)
            group = ('section-major', '', line)
        elif line:
            yield 'row', line
        prev = line


def iter_sections(lines):
    header = None
    section = []
    for kind, value in parse_table_lines(lines):
        if kind == 'row':
            if not section:
                if header is None:
                    header = value
                    continue
                raise NotImplementedError(repr(value))
            yield tuple(section), value
        else:
            if header is None:
                header = False
            start = KINDS.index(kind)
            section[start:] = [value]


def collect_sections(lines):
    sections = {}
    for section, row in iter_sections(lines):
        if section not in sections:
            sections[section] = [row]
        else:
            sections[section].append(row)
    return sections


def collate_sections(lines):
    collated = {}
    for section, rows in collect_sections(lines).items():
        parent = collated
        current = ()
        for name in section:
            current += (name,)
            try:
                child, secrows, totalrows = parent[name]
            except KeyError:
                child = {}
                secrows = []
                totalrows = []
                parent[name] = (child, secrows, totalrows)
            parent = child
            if current == section:
                secrows.extend(rows)
            totalrows.extend(rows)
    return collated


#############################
# the commands

def cmd_count_by_section(lines):
    div = ' ' + '-' * 50
    total = 0
    def render_tree(root, depth=0):
        nonlocal total
        indent = '    ' * depth
        for name, data in root.items():
            subroot, rows, totalrows = data
            sectotal = f'({len(totalrows)})' if totalrows != rows else ''
            count = len(rows) if rows else ''
            if depth == 0:
                yield div
            yield f'{sectotal:>7} {count:>4}  {indent}{name}'
            yield from render_tree(subroot, depth+1)
            total += len(rows)
    sections = collate_sections(lines)
    yield from render_tree(sections)
    yield div
    yield f'(total: {total})'


#############################
# the script

def parse_args(argv=None, prog=None):
    import argparse
    parser = argparse.ArgumentParser(prog=prog)
    parser.add_argument('filename')

    args = parser.parse_args(argv)
    ns = vars(args)

    return ns


def main(filename):
    with open(filename) as infile:
        for line in cmd_count_by_section(infile):
            print(line)


if __name__ == '__main__':
    kwargs = parse_args()
    main(**kwargs)