nixspamsum
author Pascal Volk <user@localhost.localdomain.org>
Sun, 07 Jun 2009 11:40:02 +0000
changeset 0 2d97e75f16cf
child 1 7d5cee19c20a
permissions -rwxr-xr-x
initial commit: »don't fear the nervous delete finger«

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2009 Pascal Volk

__author__ = 'Pascal Volk'
__version__ = '0.1'
__date__ = '2009-06-07'

import os
import re

class NiXSpamPlot:
    """Do sth ..."""

    """Regular expression pattern for mail logs from Postfix"""
    RE_PF = '''^[\w\s:-]{17,80}\spostfix\/smtpd\[[\d]{3,5}\]: NOQUEUE: reject:.*blocked using ix.dnsbl.manitu.net; Spam sent to the mailhost ((?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,6}) was detected by NiX Spam.*$'''

    def __init__(self):
        self._doms = {}
        self._mxs  = {}
        self._repo = None

    def setLogFormat(self, format='postfix'):
        if format == 'postfix':
            self._repo = re.compile(NiXSpamPlot.RE_PF)
        else:
            raise Exception('MTA/Logformat not supported yet.')

    def parseLog(self, filehandle):
        for l in filehandle:
            mo = self._repo.match(l)
            if mo:
                mx = mo.group(1)
                try:
                    self._mxs[mx] += 1
                except KeyError:
                    self._mxs[mx]  = 1

    def countByDom(self):
        for mx in self._mxs.keys():
            dom = '.'.join(mx.split('.')[-2:])
            try:
                self._doms[dom] += self._mxs[mx]
            except KeyError:
                self._doms[dom]  = self._mxs[mx]

    def getDomains(self):
        return self._doms

    def getMXs(self):
        return self._mxs

    def getTotal(self):
        return sum(self._mxs.values())

def getOptionParser():
    from optparse import OptionParser
    usage  = 'usage: %prog [options] maillog [maillog [...]]'
    parser = OptionParser(usage=usage, description='do something ...')
    parser.add_option('-d', action='store_true', dest='countByDom',
            default=False, help='summarize all MX by domain')
    parser.add_option('-m', action='store_false', dest='countByDom',
            help='count per MX host [default]')
    parser.add_option('-o', dest='oFormat', default='table',metavar='FORMAT',
            help='the output format: table or csv [default: %default]')
    parser.add_option('-p', action='store_true', dest='percent', default=False,
            help='show also percentages in table output [default: %default]')
    parser.add_option('-s', dest='order', default='name', metavar='SORTBY',
            help='arrange output by: name or count [default: %default]')
    parser.add_option('-t', dest='format', default='postfix',metavar='MTA',
            help='MTA that generated the maillog [default: %default]')
    return parser

def openLogFile(fname):
    try:
        fh = open(fname)
        return fh
    except IOError, e:
        os.sys.stderr.write('Warning: %s\nskipped file: %s\n' % (e.strerror,
            fname))

def showResult(nixspamplot, options):
    if options.countByDom:
        nixspamplot.countByDom()
        domains = nixspamplot.getDomains()
    else:
        domains = nixspamplot.getMXs()
    if options.percent:
        total   = nixspamplot.getTotal()
    k = 0 if options.order == 'name' else 1
    doms = sorted(domains.items(), lambda d,c: cmp(d[k],c[k]), reverse=k)
    for d in doms:
        print "%s -> %d" % d

def main():
    parser = getOptionParser()
    opts, args = parser.parse_args()
    if len(args) < 1:
        parser.error('No logfiles specified')
    nsp = NiXSpamPlot()
    nsp.setLogFormat(opts.format)
    for fn in args:
        fh = openLogFile(fn)
        if fh is not None:
            nsp.parseLog(fh)
            fh.close()
    showResult(nsp, opts)

if __name__ == '__main__':
    main()