$ time ./test3.py multi.txt Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation > /dev/nullreal 0m11.180s
user 0m21.634s
sys 0m0.493s
$ time ./test.pl multi.txt Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation > /dev/null
$ time ./test.pl multi.txt Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation Robbins Gentoo Portability SPARC OpenBSD Hurd OpenSolaris Linux FreeBSD Solaris Cisco Opennet Bash pronounced Compilation > /dev/null
real 0m23.168s
user 0m22.727s
sys 0m0.121s
$ cat test.pl
#!/usr/bin/perl
open(F,shift @ARGV);
my @a = map { qr{$_} } @ARGV;
for $l (<F>) { for (@a) { print $l and last if ($l =~ $_) } };
#!/usr/bin/env python
import os,sys,re,multiprocessing
expr=[re.compile(i) for i in sys.argv[2:]]
def matcher(lines):
elist=expr
return [line for line in lines if [exp for exp in elist if exp.search(line)]]
if __name__ == '__main__':
chunk = lambda ulist, step: map(lambda i: ulist[i:i+step], xrange(0, len(ulist), step))
pool = multiprocessing.Pool(processes=8)
data=open(sys.argv[1],'rb').readlines()
dlist=chunk(open(sys.argv[1],'rb').readlines(),int(len(data)/128))
print pool.map(matcher, dlist)
вообще, тут уже бенчи libpcre.
37M 2009-11-09 02:42 multi.txt