diff options
author | Jack Jansen <jack.jansen@cwi.nl> | 1999-02-03 12:07:14 (GMT) |
---|---|---|
committer | Jack Jansen <jack.jansen@cwi.nl> | 1999-02-03 12:07:14 (GMT) |
commit | 33a5d7e7512f2118b3840bacc07e72b1936c55fd (patch) | |
tree | e754b0a315239c751071f10b4a88bce3d6773321 | |
parent | 7bcd84ddb2a03e5855a13ac6a53ce26628d0bae7 (diff) | |
download | cpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.zip cpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.tar.gz cpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.tar.bz2 |
Just's script to find non-7-bit-clean charactres in a source tree (slightly
modified by Jack to skip files that are binary despite the TEXT type).
-rw-r--r-- | Mac/scripts/findgremlins.py | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/Mac/scripts/findgremlins.py b/Mac/scripts/findgremlins.py new file mode 100644 index 0000000..fe40e64 --- /dev/null +++ b/Mac/scripts/findgremlins.py @@ -0,0 +1,53 @@ +"""findgremlins - Search through a folder and subfolders for +text files that have characters with bit 8 set, and print +the filename and a bit of context. + +By Just, with a little glue by Jack""" + +import macfs +import re +import os +import string +import sys + +xpat = re.compile(r"[\200-\377]") + +def walk(top, recurse=1): + if os.path.isdir(top): + if recurse: + for name in os.listdir(top): + path = os.path.join(top, name) + walk(path) + else: + cr, tp = macfs.FSSpec(top).GetCreatorType() + if tp == 'TEXT' and top[-4:] <> ".hqx": + data = open(top).read() + badcount = 0 + for ch in data[:256]: + if ord(ch) == 0 or ord(ch) >= 0200: + badcount = badcount + 1 + if badcount > 16: + print `top`, 'appears to be a binary file' + return + pos = 0 + gotone = 0 + while 1: + m = xpat.search(data, pos) + if m is None: + break + if not gotone: + print `top` + gotone = 1 + [(i, j)] = m.regs + print " ", string.replace(data[i-15:j+15], '\n', ' ') + pos = j + +def main(): + fss, ok = macfs.GetDirectory() + if ok: + walk(fss.as_pathname()) + +if __name__ == '__main__': + main() + sys.exit(1) # So we see the output + |