Count also naked IPv4 addresses. Beautified NiXSapmSum:RE_PF.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2009 - 2010 Pascal Volk
# See COPYING for distribution information.
__author__ = 'Pascal Volk'
__version__ = '0.1.2'
__date__ = '2009-07-03'
import os
import re
import fileinput
class NiXSapmSum(object):
"""
Small log parser class to parse and summarize NiX Spam DNSBL lookup
based rejects from a mail log file.
"""
__slots__ = ('_doms', '_mxs', '_repo')
RE_FQDN = '(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,6}'
RE_IPv4 = '(?:[\d]{1,3}\.){3}[\d]{1,3}'
"""Regular expression pattern for mail logs from Postfix"""
RE_PF = r'''^[\w\s:-]{17,80}\spostfix\/smtpd\[[\d]{1,5}\]:\sNOQUEUE:
\sreject:.*blocked\susing\six.dnsbl.manitu.net;
\sSpam\ssent\sto\sthe\smailhost\s(%s|%s)
\swas\sdetected\sby\sNiX\sSpam.*$''' % (RE_FQDN, RE_IPv4)
def __init__(self):
self._doms = {}
self._mxs = {}
self._repo = None
def setLogFormat(self, format='postfix'):
if format == 'postfix':
self._repo = re.compile(NiXSapmSum.RE_PF, re.VERBOSE)
else:
raise Exception('MTA/Logformat not supported yet.')
def parseLog(self, filehandle):
for l in filehandle:
mo = self._repo.match(l)
if mo:
mx = mo.group(1)
try:
self._mxs[mx] += 1
except KeyError:
self._mxs[mx] = 1
def countByDom(self):
ipv4po = re.compile(NiXSapmSum.RE_IPv4)
for mx in self._mxs.keys():
mo = ipv4po.match(mx)
if mo:
dom = mo.group(0)
else:
dom = '.'.join(mx.split('.')[-2:])
try:
self._doms[dom] += self._mxs[mx]
except KeyError:
self._doms[dom] = self._mxs[mx]
def getDomains(self):
return self._doms
def getMXs(self):
return self._mxs
def getOptionParser():
from optparse import OptionParser
description = 'NiX Spam DNSBL lookup based rejects summarizer'
usage = 'usage: %prog [options] maillog [maillog [...]]'
version = '%prog '+__version__
parser = OptionParser(description=description,usage=usage,version=version)
parser.add_option('-d', action='store_true', dest='countByDom',
default=False, help='summarize all MX by domain')
parser.add_option('-m', action='store_false', dest='countByDom',
help='count per MX host [default]')
parser.add_option('-o', dest='oFormat', default='table',metavar='FORMAT',
choices=('csv', 'table'),
help='the output format: table or csv [default: %default]')
parser.add_option('-p', action='store_true', dest='percent', default=False,
help='show also percentages in table output [default: %default]')
parser.add_option('-s', dest='order', default='name', metavar='SORTBY',
help='arrange output by: name or count [default: %default]')
parser.add_option('-t', dest='format', default='postfix',metavar='MTA',
choices=('postfix',),
help='MTA that generated the maillog [default: %default]')
return parser
def check_files(log_files):
"""Checks that all files from *log_files* exist and all of them are
readable.
If a file doesn't exist or is not readable, it will be removed from
*log_files* set.
This function will return *True*, if at least one file has passed the
checks. Otherwise *False* will be returned. And the *log_files* set
will be emptied.
:param log_files: set of file names
:type log_files: set
:rtype: bool"""
assert isinstance(log_files, set), 'log_files argument must be a set'
lf_copy = log_files.copy()
for lf in lf_copy:
if not os.path.isfile(lf):
os.sys.stderr.write('Warning: No such file: %r\n' % lf)
log_files.remove(lf)
elif not os.access(lf, os.R_OK):
os.sys.stderr.write('Warning: Cannot read file: %r\n' % lf)
log_files.remove(lf)
if log_files:
return True
return False
def buildTable(output, domains, percent, orderBy):
k = 0 if orderBy == 'name' else 1
doms = sorted(domains.items(), lambda d,c: cmp(d[k],c[k]), reverse=k)
dlen = len(max(domains.iterkeys(), key=len)) + 1
clen = len(str(max(domains.values())))
total = sum(domains.values())
if percent:
format = ' %%%ds %%%dd %%6.2f %%%%\n' % (-dlen, clen)
for d, c in doms:
dfrac = 100./total*c
output.write(format % (d, c, dfrac))
output.write('%s\n' % ((clen+dlen+14)*'-'))
output.write(format % ('total', total, 100))
else:
format = ' %%%ds %%%dd\n' % (-dlen, clen)
for d in doms:
output.write(format % d)
output.write('%s\n' % ((clen+dlen+4)*'-'))
output.write(format % ('total', total))
def showResult(nixspamsum, options):
if options.countByDom:
nixspamsum.countByDom()
domains = nixspamsum.getDomains()
else:
domains = nixspamsum.getMXs()
if not domains:
print "No NiX Spam DNSBL rejects found"
return
from cStringIO import StringIO
output = StringIO()
# build the table
if options.oFormat == 'table':
buildTable(output, domains, options.percent, options.order)
# generate comma separated values
elif options.oFormat == 'csv':
order = domains.keys()
order.sort()
for d in order:
output.write("'%s',%d\n" % (d, domains[d]))
# should never be reached
else:
print "Oops, error in function showResult() happend"
# show the result
print output.getvalue()
def main():
parser = getOptionParser()
opts, args = parser.parse_args()
if not args:
parser.error('No log file specified')
# drop duplicates
log_files = set(args)
# remove inexistent/unreadable files
if not check_files(log_files):
os.sys.stderr.write('No readable log files found\n')
return 1
nixss = NiXSapmSum()
nixss.setLogFormat(opts.format)
fi = fileinput.FileInput(log_files, openhook=fileinput.hook_compressed)
nixss.parseLog(fi)
fi.close()
showResult(nixss, opts)
return 0
if __name__ == '__main__':
os.sys.exit(main())