1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
#! /usr/bin/env python
"""Show file statistics by extension."""
import os
import sys
class Stats:
def __init__(self):
self.stats = {}
def statargs(self, args):
for arg in args:
if os.path.isdir(arg):
self.statdir(arg)
elif os.path.isfile(arg):
self.statfile(arg)
else:
sys.stderr.write("Can't find %s\n" % file)
self.addstats("<???>", "unknown", 1)
def statdir(self, dir):
self.addstats("<dir>", "dirs", 1)
try:
names = os.listdir(dir)
except os.error, err:
sys.stderr.write("Can't list %s: %s\n" % (file, err))
self.addstats(ext, "unlistable", 1)
return
names.sort()
for name in names:
if name.startswith(".#"):
continue # Skip CVS temp files
if name.endswith("~"):
continue# Skip Emacs backup files
full = os.path.join(dir, name)
if os.path.islink(full):
self.addstats("<lnk>", "links", 1)
elif os.path.isdir(full):
self.statdir(full)
else:
self.statfile(full)
def statfile(self, file):
head, ext = os.path.splitext(file)
head, base = os.path.split(file)
if ext == base:
ext = "" # E.g. .cvsignore is deemed not to have an extension
ext = os.path.normcase(ext)
if not ext:
ext = "<none>"
self.addstats(ext, "files", 1)
try:
f = open(file, "rb")
except IOError, err:
sys.stderr.write("Can't open %s: %s\n" % (file, err))
self.addstats(ext, "unopenable", 1)
return
data = f.read()
f.close()
self.addstats(ext, "bytes", len(data))
if '\0' in data:
self.addstats(ext, "binary", 1)
return
if not data:
self.addstats(ext, "empty", 1)
#self.addstats(ext, "chars", len(data))
lines = data.splitlines()
self.addstats(ext, "lines", len(lines))
del lines
words = data.split()
self.addstats(ext, "words", len(words))
def addstats(self, ext, key, n):
d = self.stats.setdefault(ext, {})
d[key] = d.get(key, 0) + n
def report(self):
exts = self.stats.keys()
exts.sort()
# Get the column keys
columns = {}
for ext in exts:
columns.update(self.stats[ext])
cols = columns.keys()
cols.sort()
colwidth = {}
colwidth["ext"] = max([len(ext) for ext in exts])
minwidth = 6
self.stats["TOTAL"] = {}
for col in cols:
total = 0
cw = max(minwidth, len(col))
for ext in exts:
value = self.stats[ext].get(col)
if value is None:
w = 0
else:
w = len("%d" % value)
total += value
cw = max(cw, w)
cw = max(cw, len(str(total)))
colwidth[col] = cw
self.stats["TOTAL"][col] = total
exts.append("TOTAL")
for ext in exts:
self.stats[ext]["ext"] = ext
cols.insert(0, "ext")
def printheader():
for col in cols:
print "%*s" % (colwidth[col], col),
print
printheader()
for ext in exts:
for col in cols:
value = self.stats[ext].get(col, "")
print "%*s" % (colwidth[col], value),
print
printheader() # Another header at the bottom
def main():
args = sys.argv[1:]
if not args:
args = [os.curdir]
s = Stats()
s.statargs(args)
s.report()
if __name__ == "__main__":
main()
|