1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
#! /usr/bin/env python3
"""Show file statistics by extension."""
import os
import sys
class Stats:
def __init__(self):
self.stats = {}
def statargs(self, args):
for arg in args:
if os.path.isdir(arg):
self.statdir(arg)
elif os.path.isfile(arg):
self.statfile(arg)
else:
sys.stderr.write("Can't find %s\n" % arg)
self.addstats("<???>", "unknown", 1)
def statdir(self, dir):
self.addstats("<dir>", "dirs", 1)
try:
names = os.listdir(dir)
except OSError as err:
sys.stderr.write("Can't list %s: %s\n" % (dir, err))
self.addstats("<dir>", "unlistable", 1)
return
for name in sorted(names):
if name.startswith(".#"):
continue # Skip CVS temp files
if name.endswith("~"):
continue # Skip Emacs backup files
full = os.path.join(dir, name)
if os.path.islink(full):
self.addstats("<lnk>", "links", 1)
elif os.path.isdir(full):
self.statdir(full)
else:
self.statfile(full)
def statfile(self, filename):
head, ext = os.path.splitext(filename)
head, base = os.path.split(filename)
if ext == base:
ext = "" # E.g. .cvsignore is deemed not to have an extension
ext = os.path.normcase(ext)
if not ext:
ext = "<none>"
self.addstats(ext, "files", 1)
try:
with open(filename, "rb") as f:
data = f.read()
except IOError as err:
sys.stderr.write("Can't open %s: %s\n" % (filename, err))
self.addstats(ext, "unopenable", 1)
return
self.addstats(ext, "bytes", len(data))
if b'\0' in data:
self.addstats(ext, "binary", 1)
return
if not data:
self.addstats(ext, "empty", 1)
# self.addstats(ext, "chars", len(data))
lines = str(data, "latin-1").splitlines()
self.addstats(ext, "lines", len(lines))
del lines
words = data.split()
self.addstats(ext, "words", len(words))
def addstats(self, ext, key, n):
d = self.stats.setdefault(ext, {})
d[key] = d.get(key, 0) + n
def report(self):
exts = sorted(self.stats)
# Get the column keys
columns = {}
for ext in exts:
columns.update(self.stats[ext])
cols = sorted(columns)
colwidth = {}
colwidth["ext"] = max([len(ext) for ext in exts])
minwidth = 6
self.stats["TOTAL"] = {}
for col in cols:
total = 0
cw = max(minwidth, len(col))
for ext in exts:
value = self.stats[ext].get(col)
if value is None:
w = 0
else:
w = len("%d" % value)
total += value
cw = max(cw, w)
cw = max(cw, len(str(total)))
colwidth[col] = cw
self.stats["TOTAL"][col] = total
exts.append("TOTAL")
for ext in exts:
self.stats[ext]["ext"] = ext
cols.insert(0, "ext")
def printheader():
for col in cols:
print("%*s" % (colwidth[col], col), end=' ')
print()
printheader()
for ext in exts:
for col in cols:
value = self.stats[ext].get(col, "")
print("%*s" % (colwidth[col], value), end=' ')
print()
printheader() # Another header at the bottom
def main():
args = sys.argv[1:]
if not args:
args = [os.curdir]
s = Stats()
s.statargs(args)
s.report()
if __name__ == "__main__":
main()
|