Demo/comparisons/regextest.py
#! /usr/bin/env python # 1) Regular Expressions Test # # Read a file of (extended per egrep) regular expressions (one per line), # and apply those to all files whose names are listed on the command line. # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns # against a five /etc/termcap files. Tests using more elaborate patters # would also be interesting. Your code should not break if given hundreds # of regular expressions or binary files to scan. # This implementation: # - combines all patterns into a single one using ( ... | ... | ... ) # - reads patterns from stdin, scans files given as command line arguments # - produces output in the format <file>:<lineno>:<line> # - is only about 2.5 times as slow as egrep (though I couldn't run # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) import string import sys import re def main(): pats = map(chomp, sys.stdin.readlines()) bigpat = '(' + '|'.join(pats) + ')' prog = re.compile(bigpat) for file in sys.argv[1:]: try: fp = open(file, 'r') except IOError, msg: print "%s: %s" % (file, msg) continue lineno = 0 while 1: line = fp.readline() if not line: break lineno = lineno + 1 if prog.search(line): print "%s:%s:%s" % (file, lineno, line), def chomp(s): return s.rstrip('\n') if __name__ == '__main__': main() |