Count also naked IPv4 addresses. Beautified NiXSapmSum:RE_PF.
#!/usr/bin/env python# -*- coding: utf-8 -*-# Copyright 2009 - 2010 Pascal Volk# See COPYING for distribution information.__author__='Pascal Volk'__version__='0.1.2'__date__='2009-07-03'importosimportreimportfileinputclassNiXSapmSum(object):""" Small log parser class to parse and summarize NiX Spam DNSBL lookup based rejects from a mail log file. """__slots__=('_doms','_mxs','_repo')RE_FQDN='(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,6}'RE_IPv4='(?:[\d]{1,3}\.){3}[\d]{1,3}'"""Regular expression pattern for mail logs from Postfix"""RE_PF=r'''^[\w\s:-]{17,80}\spostfix\/smtpd\[[\d]{1,5}\]:\sNOQUEUE: \sreject:.*blocked\susing\six.dnsbl.manitu.net; \sSpam\ssent\sto\sthe\smailhost\s(%s|%s) \swas\sdetected\sby\sNiX\sSpam.*$'''%(RE_FQDN,RE_IPv4)def__init__(self):self._doms={}self._mxs={}self._repo=NonedefsetLogFormat(self,format='postfix'):ifformat=='postfix':self._repo=re.compile(NiXSapmSum.RE_PF,re.VERBOSE)else:raiseException('MTA/Logformat not supported yet.')defparseLog(self,filehandle):forlinfilehandle:mo=self._repo.match(l)ifmo:mx=mo.group(1)try:self._mxs[mx]+=1exceptKeyError:self._mxs[mx]=1defcountByDom(self):ipv4po=re.compile(NiXSapmSum.RE_IPv4)formxinself._mxs.keys():mo=ipv4po.match(mx)ifmo:dom=mo.group(0)else:dom='.'.join(mx.split('.')[-2:])try:self._doms[dom]+=self._mxs[mx]exceptKeyError:self._doms[dom]=self._mxs[mx]defgetDomains(self):returnself._domsdefgetMXs(self):returnself._mxsdefgetOptionParser():fromoptparseimportOptionParserdescription='NiX Spam DNSBL lookup based rejects summarizer'usage='usage: %prog [options] maillog [maillog [...]]'version='%prog '+__version__parser=OptionParser(description=description,usage=usage,version=version)parser.add_option('-d',action='store_true',dest='countByDom',default=False,help='summarize all MX by domain')parser.add_option('-m',action='store_false',dest='countByDom',help='count per MX host [default]')parser.add_option('-o',dest='oFormat',default='table',metavar='FORMAT',choices=('csv','table'),help='the output format: table or csv [default: %default]')parser.add_option('-p',action='store_true',dest='percent',default=False,help='show also percentages in table output [default: %default]')parser.add_option('-s',dest='order',default='name',metavar='SORTBY',help='arrange output by: name or count [default: %default]')parser.add_option('-t',dest='format',default='postfix',metavar='MTA',choices=('postfix',),help='MTA that generated the maillog [default: %default]')returnparserdefcheck_files(log_files):"""Checks that all files from *log_files* exist and all of them are readable. If a file doesn't exist or is not readable, it will be removed from *log_files* set. This function will return *True*, if at least one file has passed the checks. Otherwise *False* will be returned. And the *log_files* set will be emptied. :param log_files: set of file names :type log_files: set :rtype: bool"""assertisinstance(log_files,set),'log_files argument must be a set'lf_copy=log_files.copy()forlfinlf_copy:ifnotos.path.isfile(lf):os.sys.stderr.write('Warning: No such file: %r\n'%lf)log_files.remove(lf)elifnotos.access(lf,os.R_OK):os.sys.stderr.write('Warning: Cannot read file: %r\n'%lf)log_files.remove(lf)iflog_files:returnTruereturnFalsedefbuildTable(output,domains,percent,orderBy):k=0iforderBy=='name'else1doms=sorted(domains.items(),lambdad,c:cmp(d[k],c[k]),reverse=k)dlen=len(max(domains.iterkeys(),key=len))+1clen=len(str(max(domains.values())))total=sum(domains.values())ifpercent:format=' %%%ds %%%dd %%6.2f %%%%\n'%(-dlen,clen)ford,cindoms:dfrac=100./total*coutput.write(format%(d,c,dfrac))output.write('%s\n'%((clen+dlen+14)*'-'))output.write(format%('total',total,100))else:format=' %%%ds %%%dd\n'%(-dlen,clen)fordindoms:output.write(format%d)output.write('%s\n'%((clen+dlen+4)*'-'))output.write(format%('total',total))defshowResult(nixspamsum,options):ifoptions.countByDom:nixspamsum.countByDom()domains=nixspamsum.getDomains()else:domains=nixspamsum.getMXs()ifnotdomains:print"No NiX Spam DNSBL rejects found"returnfromcStringIOimportStringIOoutput=StringIO()# build the tableifoptions.oFormat=='table':buildTable(output,domains,options.percent,options.order)# generate comma separated valueselifoptions.oFormat=='csv':order=domains.keys()order.sort()fordinorder:output.write("'%s',%d\n"%(d,domains[d]))# should never be reachedelse:print"Oops, error in function showResult() happend"# show the resultprintoutput.getvalue()defmain():parser=getOptionParser()opts,args=parser.parse_args()ifnotargs:parser.error('No log file specified')# drop duplicateslog_files=set(args)# remove inexistent/unreadable filesifnotcheck_files(log_files):os.sys.stderr.write('No readable log files found\n')return1nixss=NiXSapmSum()nixss.setLogFormat(opts.format)fi=fileinput.FileInput(log_files,openhook=fileinput.hook_compressed)nixss.parseLog(fi)fi.close()showResult(nixss,opts)return0if__name__=='__main__':os.sys.exit(main())