summaryrefslogtreecommitdiffstats
path: root/Tools/scripts/byext.py
blob: b79ff37e8cf036eb320ed997171bd990e6f064da (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#! /usr/bin/env python3

"""Show file statistics by extension."""

import os
import sys


class Stats:

    def __init__(self):
        self.stats = {}

    def statargs(self, args):
        for arg in args:
            if os.path.isdir(arg):
                self.statdir(arg)
            elif os.path.isfile(arg):
                self.statfile(arg)
            else:
                sys.stderr.write("Can't find %s\n" % arg)
                self.addstats("<???>", "unknown", 1)

    def statdir(self, dir):
        self.addstats("<dir>", "dirs", 1)
        try:
            names = os.listdir(dir)
        except os.error as err:
            sys.stderr.write("Can't list %s: %s\n" % (dir, err))
            self.addstats("<dir>", "unlistable", 1)
            return
        for name in sorted(names):
            if name.startswith(".#"):
                continue  # Skip CVS temp files
            if name.endswith("~"):
                continue  # Skip Emacs backup files
            full = os.path.join(dir, name)
            if os.path.islink(full):
                self.addstats("<lnk>", "links", 1)
            elif os.path.isdir(full):
                self.statdir(full)
            else:
                self.statfile(full)

    def statfile(self, filename):
        head, ext = os.path.splitext(filename)
        head, base = os.path.split(filename)
        if ext == base:
            ext = ""  # E.g. .cvsignore is deemed not to have an extension
        ext = os.path.normcase(ext)
        if not ext:
            ext = "<none>"
        self.addstats(ext, "files", 1)
        try:
            with open(filename, "rb") as f:
                data = f.read()
        except IOError as err:
            sys.stderr.write("Can't open %s: %s\n" % (filename, err))
            self.addstats(ext, "unopenable", 1)
            return
        self.addstats(ext, "bytes", len(data))
        if b'\0' in data:
            self.addstats(ext, "binary", 1)
            return
        if not data:
            self.addstats(ext, "empty", 1)
        # self.addstats(ext, "chars", len(data))
        lines = str(data, "latin-1").splitlines()
        self.addstats(ext, "lines", len(lines))
        del lines
        words = data.split()
        self.addstats(ext, "words", len(words))

    def addstats(self, ext, key, n):
        d = self.stats.setdefault(ext, {})
        d[key] = d.get(key, 0) + n

    def report(self):
        exts = sorted(self.stats)
        # Get the column keys
        columns = {}
        for ext in exts:
            columns.update(self.stats[ext])
        cols = sorted(columns)
        colwidth = {}
        colwidth["ext"] = max([len(ext) for ext in exts])
        minwidth = 6
        self.stats["TOTAL"] = {}
        for col in cols:
            total = 0
            cw = max(minwidth, len(col))
            for ext in exts:
                value = self.stats[ext].get(col)
                if value is None:
                    w = 0
                else:
                    w = len("%d" % value)
                    total += value
                cw = max(cw, w)
            cw = max(cw, len(str(total)))
            colwidth[col] = cw
            self.stats["TOTAL"][col] = total
        exts.append("TOTAL")
        for ext in exts:
            self.stats[ext]["ext"] = ext
        cols.insert(0, "ext")

        def printheader():
            for col in cols:
                print("%*s" % (colwidth[col], col), end=' ')
            print()

        printheader()
        for ext in exts:
            for col in cols:
                value = self.stats[ext].get(col, "")
                print("%*s" % (colwidth[col], value), end=' ')
            print()
        printheader()  # Another header at the bottom


def main():
    args = sys.argv[1:]
    if not args:
        args = [os.curdir]
    s = Stats()
    s.statargs(args)
    s.report()


if __name__ == "__main__":
    main()