Tools/scripts/fixcid.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316

#! /usr/bin/env python3

# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (off by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c).  The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.

import sys
import re
import os
from stat import *
import getopt

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

def usage():
    progname = sys.argv[0]
    err('Usage: ' + progname +
              ' [-c] [-r] [-s file] ... file-or-directory ...\n')
    err('\n')
    err('-c           : substitute inside comments\n')
    err('-r           : reverse direction for following -s options\n')
    err('-s substfile : add a file of substitutions\n')
    err('\n')
    err('Each non-empty non-comment line in a substitution file must\n')
    err('contain exactly two words: an identifier and its replacement.\n')
    err('Comments start with a # character and end at end of line.\n')
    err('If an identifier is preceded with a *, it is not substituted\n')
    err('inside a comment even when -c is specified.\n')

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'crs:')
    except getopt.error as msg:
        err('Options error: ' + str(msg) + '\n')
        usage()
        sys.exit(2)
    bad = 0
    if not args: # No arguments
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-c':
            setdocomments()
        if opt == '-r':
            setreverse()
        if opt == '-s':
            addsubst(arg)
    for arg in args:
        if os.path.isdir(arg):
            if recursedown(arg): bad = 1
        elif os.path.islink(arg):
            err(arg + ': will not process symbolic links\n')
            bad = 1
        else:
            if fix(arg): bad = 1
    sys.exit(bad)

# Change this regular expression to select a different set of files
Wanted = r'^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
    return re.match(Wanted, name)

def recursedown(dirname):
    dbg('recursedown(%r)\n' % (dirname,))
    bad = 0
    try:
        names = os.listdir(dirname)
    except OSError as msg:
        err(dirname + ': cannot list directory: ' + str(msg) + '\n')
        return 1
    names.sort()
    subdirs = []
    for name in names:
        if name in (os.curdir, os.pardir): continue
        fullname = os.path.join(dirname, name)
        if os.path.islink(fullname): pass
        elif os.path.isdir(fullname):
            subdirs.append(fullname)
        elif wanted(name):
            if fix(fullname): bad = 1
    for fullname in subdirs:
        if recursedown(fullname): bad = 1
    return bad

def fix(filename):
##  dbg('fix(%r)\n' % (filename,))
    if filename == '-':
        # Filter mode
        f = sys.stdin
        g = sys.stdout
    else:
        # File replacement mode
        try:
            f = open(filename, 'r')
        except IOError as msg:
            err(filename + ': cannot open: ' + str(msg) + '\n')
            return 1
        head, tail = os.path.split(filename)
        tempname = os.path.join(head, '@' + tail)
        g = None
    # If we find a match, we rewind the file and start over but
    # now copy everything to a temp file.
    lineno = 0
    initfixline()
    while 1:
        line = f.readline()
        if not line: break
        lineno = lineno + 1
        while line[-2:] == '\\\n':
            nextline = f.readline()
            if not nextline: break
            line = line + nextline
            lineno = lineno + 1
        newline = fixline(line)
        if newline != line:
            if g is None:
                try:
                    g = open(tempname, 'w')
                except IOError as msg:
                    f.close()
                    err(tempname+': cannot create: '+
                        str(msg)+'\n')
                    return 1
                f.seek(0)
                lineno = 0
                initfixline()
                rep(filename + ':\n')
                continue # restart from the beginning
            rep(repr(lineno) + '\n')
            rep('< ' + line)
            rep('> ' + newline)
        if g is not None:
            g.write(newline)

    # End of file
    if filename == '-': return 0 # Done in filter mode
    f.close()
    if not g: return 0 # No changes
    g.close()

    # Finishing touch -- move files

    # First copy the file's mode to the temp file
    try:
        statbuf = os.stat(filename)
        os.chmod(tempname, statbuf[ST_MODE] & 0o7777)
    except OSError as msg:
        err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
    # Then make a backup of the original file as filename~
    try:
        os.rename(filename, filename + '~')
    except OSError as msg:
        err(filename + ': warning: backup failed (' + str(msg) + ')\n')
    # Now move the temp file to the original file
    try:
        os.rename(tempname, filename)
    except OSError as msg:
        err(filename + ': rename failed (' + str(msg) + ')\n')
        return 1
    # Return success
    return 0

# Tokenizing ANSI C (partly)

Identifier = '(struct )?[a-zA-Z_][a-zA-Z0-9_]+'
String = r'"([^\n\\"]|\\.)*"'
Char = r"'([^\n\\']|\\.)*'"
CommentStart = r'/\*'
CommentEnd = r'\*/'

Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '|' + Octnumber + '|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = r'([0-9]+\.[0-9]*|\.[0-9]+)(' + Exponent + r')?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '|' + Expfloat
Number = Floatnumber + '|' + Intnumber

# Anything else is an operator -- don't list this explicitly because of '/*'

OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
OutsideCommentProgram = re.compile(OutsideCommentPattern)

InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
InsideCommentProgram = re.compile(InsideCommentPattern)

def initfixline():
    global Program
    Program = OutsideCommentProgram

def fixline(line):
    global Program
##  print('-->', repr(line))
    i = 0
    while i < len(line):
        match = Program.search(line, i)
        if match is None: break
        i = match.start()
        found = match.group(0)
##      if Program is InsideCommentProgram: print(end='... ')
##      else: print(end='    ')
##      print(found)
        if len(found) == 2:
            if found == '/*':
                Program = InsideCommentProgram
            elif found == '*/':
                Program = OutsideCommentProgram
        n = len(found)
        if found in Dict:
            subst = Dict[found]
            if Program is InsideCommentProgram:
                if not Docomments:
                    print('Found in comment:', found)
                    i = i + n
                    continue
                if found in NotInComment:
##                  print(end='Ignored in comment: ')
##                  print(found, '-->', subst)
##                  print('Line:', line, end='')
                    subst = found
##              else:
##                  print(end='Substituting in comment: ')
##                  print(found, '-->', subst)
##                  print('Line:', line, end='')
            line = line[:i] + subst + line[i+n:]
            n = len(subst)
        i = i + n
    return line

Docomments = 0
def setdocomments():
    global Docomments
    Docomments = 1

Reverse = 0
def setreverse():
    global Reverse
    Reverse = (not Reverse)

Dict = {}
NotInComment = {}
def addsubst(substfile):
    try:
        fp = open(substfile, 'r')
    except IOError as msg:
        err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
        sys.exit(1)
    lineno = 0
    while 1:
        line = fp.readline()
        if not line: break
        lineno = lineno + 1
        try:
            i = line.index('#')
        except ValueError:
            i = -1          # Happens to delete trailing \n
        words = line[:i].split()
        if not words: continue
        if len(words) == 3 and words[0] == 'struct':
            words[:2] = [words[0] + ' ' + words[1]]
        elif len(words) != 2:
            err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line))
            continue
        if Reverse:
            [value, key] = words
        else:
            [key, value] = words
        if value[0] == '*':
            value = value[1:]
        if key[0] == '*':
            key = key[1:]
            NotInComment[key] = value
        if key in Dict:
            err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value))
            err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key]))
        Dict[key] = value
    fp.close()

if __name__ == '__main__':
    main()