summaryrefslogtreecommitdiffstats
path: root/Demo/comparisons/regextest.py
blob: b27d741d7e0a344928a157fbb47c7101aa07f48c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/env python

# 1)  Regular Expressions Test
#
#     Read a file of (extended per egrep) regular expressions (one per line),
#     and apply those to all files whose names are listed on the command line.
#     Basically, an 'egrep -f' simulator.  Test it with 20 "vt100" patterns
#     against a five /etc/termcap files.  Tests using more elaborate patters
#     would also be interesting.  Your code should not break if given hundreds
#     of regular expressions or binary files to scan.

# This implementation:
# - combines all patterns into a single one using ( ... | ... | ... )
# - reads patterns from stdin, scans files given as command line arguments
# - produces output in the format <file>:<lineno>:<line>
# - is only about 2.5 times as slow as egrep (though I couldn't run
#   Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)

import string
import sys
import re

def main():
    pats = map(chomp, sys.stdin.readlines())
    bigpat = '(' + '|'.join(pats) + ')'
    prog = re.compile(bigpat)

    for file in sys.argv[1:]:
        try:
            fp = open(file, 'r')
        except IOError, msg:
            print "%s: %s" % (file, msg)
            continue
        lineno = 0
        while 1:
            line = fp.readline()
            if not line:
                break
            lineno = lineno + 1
            if prog.search(line):
                print "%s:%s:%s" % (file, lineno, line),

def chomp(s):
    return s.rstrip('\n')

if __name__ == '__main__':
    main()