1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
KINDS = [
'section-major',
'section-minor',
'section-group',
'row',
]
def iter_clean_lines(lines):
lines = iter(lines)
for rawline in lines:
line = rawline.strip()
if line.startswith('#') and not rawline.startswith('##'):
continue
yield line, rawline
def parse_table_lines(lines):
lines = iter_clean_lines(lines)
group = None
prev = ''
for line, rawline in lines:
if line.startswith('## '):
assert not rawline.startswith(' '), (line, rawline)
if group:
assert prev, (line, rawline)
kind, after, _ = group
assert kind and kind != 'section-group', (group, line, rawline)
assert after is not None, (group, line, rawline)
else:
assert not prev, (prev, line, rawline)
kind, after = group = ('section-group', None)
title = line[3:].lstrip()
assert title, (line, rawline)
if after is not None:
try:
line, rawline = next(lines)
except StopIteration:
line = None
if line != after:
raise NotImplementedError((group, line, rawline))
yield kind, title
group = None
elif group:
raise NotImplementedError((group, line, rawline))
elif line.startswith('##---'):
assert line.rstrip('-') == '##', (line, rawline)
group = ('section-minor', '', line)
elif line.startswith('#####'):
assert not line.strip('#'), (line, rawline)
group = ('section-major', '', line)
elif line:
yield 'row', line
prev = line
def iter_sections(lines):
header = None
section = []
for kind, value in parse_table_lines(lines):
if kind == 'row':
if not section:
if header is None:
header = value
continue
raise NotImplementedError(repr(value))
yield tuple(section), value
else:
if header is None:
header = False
start = KINDS.index(kind)
section[start:] = [value]
def collect_sections(lines):
sections = {}
for section, row in iter_sections(lines):
if section not in sections:
sections[section] = [row]
else:
sections[section].append(row)
return sections
def collate_sections(lines):
collated = {}
for section, rows in collect_sections(lines).items():
parent = collated
current = ()
for name in section:
current += (name,)
try:
child, secrows, totalrows = parent[name]
except KeyError:
child = {}
secrows = []
totalrows = []
parent[name] = (child, secrows, totalrows)
parent = child
if current == section:
secrows.extend(rows)
totalrows.extend(rows)
return collated
#############################
# the commands
def cmd_count_by_section(lines):
div = ' ' + '-' * 50
total = 0
def render_tree(root, depth=0):
nonlocal total
indent = ' ' * depth
for name, data in root.items():
subroot, rows, totalrows = data
sectotal = f'({len(totalrows)})' if totalrows != rows else ''
count = len(rows) if rows else ''
if depth == 0:
yield div
yield f'{sectotal:>7} {count:>4} {indent}{name}'
yield from render_tree(subroot, depth+1)
total += len(rows)
sections = collate_sections(lines)
yield from render_tree(sections)
yield div
yield f'(total: {total})'
#############################
# the script
def parse_args(argv=None, prog=None):
import argparse
parser = argparse.ArgumentParser(prog=prog)
parser.add_argument('filename')
args = parser.parse_args(argv)
ns = vars(args)
return ns
def main(filename):
with open(filename) as infile:
for line in cmd_count_by_section(infile):
print(line)
if __name__ == '__main__':
kwargs = parse_args()
main(**kwargs)
|