summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Jansen <jack.jansen@cwi.nl>1999-02-03 12:07:14 (GMT)
committerJack Jansen <jack.jansen@cwi.nl>1999-02-03 12:07:14 (GMT)
commit33a5d7e7512f2118b3840bacc07e72b1936c55fd (patch)
treee754b0a315239c751071f10b4a88bce3d6773321
parent7bcd84ddb2a03e5855a13ac6a53ce26628d0bae7 (diff)
downloadcpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.zip
cpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.tar.gz
cpython-33a5d7e7512f2118b3840bacc07e72b1936c55fd.tar.bz2
Just's script to find non-7-bit-clean charactres in a source tree (slightly
modified by Jack to skip files that are binary despite the TEXT type).
-rw-r--r--Mac/scripts/findgremlins.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/Mac/scripts/findgremlins.py b/Mac/scripts/findgremlins.py
new file mode 100644
index 0000000..fe40e64
--- /dev/null
+++ b/Mac/scripts/findgremlins.py
@@ -0,0 +1,53 @@
+"""findgremlins - Search through a folder and subfolders for
+text files that have characters with bit 8 set, and print
+the filename and a bit of context.
+
+By Just, with a little glue by Jack"""
+
+import macfs
+import re
+import os
+import string
+import sys
+
+xpat = re.compile(r"[\200-\377]")
+
+def walk(top, recurse=1):
+ if os.path.isdir(top):
+ if recurse:
+ for name in os.listdir(top):
+ path = os.path.join(top, name)
+ walk(path)
+ else:
+ cr, tp = macfs.FSSpec(top).GetCreatorType()
+ if tp == 'TEXT' and top[-4:] <> ".hqx":
+ data = open(top).read()
+ badcount = 0
+ for ch in data[:256]:
+ if ord(ch) == 0 or ord(ch) >= 0200:
+ badcount = badcount + 1
+ if badcount > 16:
+ print `top`, 'appears to be a binary file'
+ return
+ pos = 0
+ gotone = 0
+ while 1:
+ m = xpat.search(data, pos)
+ if m is None:
+ break
+ if not gotone:
+ print `top`
+ gotone = 1
+ [(i, j)] = m.regs
+ print " ", string.replace(data[i-15:j+15], '\n', ' ')
+ pos = j
+
+def main():
+ fss, ok = macfs.GetDirectory()
+ if ok:
+ walk(fss.as_pathname())
+
+if __name__ == '__main__':
+ main()
+ sys.exit(1) # So we see the output
+