| #! /usr/bin/env python | |
| # 1) Regular Expressions Test | |
| # | |
| # Read a file of (extended per egrep) regular expressions (one per line), | |
| # and apply those to all files whose names are listed on the command line. | |
| # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns | |
| # against a five /etc/termcap files. Tests using more elaborate patters | |
| # would also be interesting. Your code should not break if given hundreds | |
| # of regular expressions or binary files to scan. | |
| # This implementation: | |
| # - combines all patterns into a single one using ( ... | ... | ... ) | |
| # - reads patterns from stdin, scans files given as command line arguments | |
| # - produces output in the format <file>:<lineno>:<line> | |
| # - is only about 2.5 times as slow as egrep (though I couldn't run | |
| # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) | |
| import string | |
| import sys | |
| import re | |
| def main(): | |
| pats = map(chomp, sys.stdin.readlines()) | |
| bigpat = '(' + '|'.join(pats) + ')' | |
| prog = re.compile(bigpat) | |
| for file in sys.argv[1:]: | |
| try: | |
| fp = open(file, 'r') | |
| except IOError, msg: | |
| print "%s: %s" % (file, msg) | |
| continue | |
| lineno = 0 | |
| while 1: | |
| line = fp.readline() | |
| if not line: | |
| break | |
| lineno = lineno + 1 | |
| if prog.search(line): | |
| print "%s:%s:%s" % (file, lineno, line), | |
| def chomp(s): | |
| return s.rstrip('\n') | |
| if __name__ == '__main__': | |
| main() |