author Pascal Volk <>
Sun, 11 Apr 2010 19:39:48 +0000
changeset 11 a5f5a8f288da
parent 10 07b9fe5c6fcf
child 13 b0c05ce0f44c
permissions -rwxr-xr-x
PEP-8-ified the code. Version 0.1.3

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2009 - 2010 Pascal Volk
# See COPYING for distribution information.

__author__ = 'Pascal Volk'
__version__ = '0.1.3'
__date__ = '2010-04-11'

import os
import re
import fileinput

class NiXSapmSum(object):
    Small log parser class to parse and summarize NiX Spam DNSBL lookup
    based rejects from a mail log file.
    __slots__ = ('_doms', '_mxs', '_repo')

    RE_FQDN = '(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,6}'
    RE_IPv4 = '(?:[\d]{1,3}\.){3}[\d]{1,3}'
    """Regular expression pattern for mail logs from Postfix"""
    RE_PF = r'''^[\w\s:-]{17,80}\spostfix\/smtpd\[[\d]{1,5}\]:\sNOQUEUE:
                \swas\sdetected\sby\sNiX\sSpam.*$''' % (RE_FQDN, RE_IPv4)

    def __init__(self):
        self._doms = {}
        self._mxs = {}
        self._repo = None

    def setLogFormat(self, format='postfix'):
        if format == 'postfix':
            self._repo = re.compile(NiXSapmSum.RE_PF, re.VERBOSE)
            raise Exception('MTA/Logformat not supported yet.')

    def parseLog(self, filehandle):
        for l in filehandle:
            mo = self._repo.match(l)
            if mo:
                mx =
                    self._mxs[mx] += 1
                except KeyError:
                    self._mxs[mx] = 1

    def countByDom(self):
        ipv4po = re.compile(NiXSapmSum.RE_IPv4)
        for mx in self._mxs.keys():
            mo = ipv4po.match(mx)
            if mo:
                dom =
                dom = '.'.join(mx.split('.')[-2:])
                self._doms[dom] += self._mxs[mx]
            except KeyError:
                self._doms[dom] = self._mxs[mx]

    def getDomains(self):
        return self._doms

    def getMXs(self):
        return self._mxs

def getOptionParser():
    from optparse import OptionParser
    description = 'NiX Spam DNSBL lookup based rejects summarizer'
    usage = 'usage: %prog [options] maillog [maillog [...]]'
    version = '%prog ' + __version__
    parser = OptionParser(description=description, usage=usage,
    parser.add_option('-d', action='store_true', dest='countByDom',
            default=False, help='summarize all MX by domain')
    parser.add_option('-m', action='store_false', dest='countByDom',
            help='count per MX host [default]')
    parser.add_option('-o', dest='oFormat', default='table', metavar='FORMAT',
            choices=('csv', 'table'),
            help='the output format: table or csv [default: %default]')
    parser.add_option('-p', action='store_true', dest='percent', default=False,
            help='show also percentages in table output [default: %default]')
    parser.add_option('-s', dest='order', default='name', metavar='SORTBY',
            choices=('count', 'name'),
            help='arrange output by: name or count [default: %default]')
    parser.add_option('-t', dest='format', default='postfix', metavar='MTA',
            help='MTA that generated the maillog [default: %default]')
    return parser

def check_files(log_files):
    """Checks that all files from *log_files* exist and all of them are

    If a file doesn't exist or is not readable, it will be removed from
    *log_files* set.

    This function will return *True*, if at least one file has passed the
    checks. Otherwise *False* will be returned. And the *log_files* set
    will be emptied.

    :param log_files: set of file names
    :type log_files: set
    :rtype: bool"""
    assert isinstance(log_files, set), 'log_files argument must be a set'
    lf_copy = log_files.copy()
    for lf in lf_copy:
        if not os.path.isfile(lf):
            os.sys.stderr.write('Warning: No such file: %r\n' % lf)
        elif not os.access(lf, os.R_OK):
            os.sys.stderr.write('Warning: Cannot read file: %r\n' % lf)

    if log_files:
        return True
    return False

def buildTable(output, domains, percent, orderBy):
    k = 0 if orderBy == 'name' else 1
    doms = sorted(domains.items(), lambda d, c: cmp(d[k], c[k]), reverse=k)
    dlen = len(max(domains.iterkeys(), key=len)) + 1
    clen = len(str(max(domains.values())))
    total = sum(domains.values())
    if percent:
        format = ' %%%ds  %%%dd  %%6.2f %%%%\n' % (-dlen, clen)
        for d, c in doms:
            dfrac = 100. / total * c
            output.write(format % (d, c, dfrac))
        output.write('%s\n' % ((clen + dlen + 14) * '-'))
        output.write(format % ('total', total, 100))
        format = ' %%%ds  %%%dd\n' % (-dlen, clen)
        for d in doms:
            output.write(format % d)
        output.write('%s\n' % ((clen + dlen + 4) * '-'))
        output.write(format % ('total', total))

def showResult(nixspamsum, options):
    if options.countByDom:
        domains = nixspamsum.getDomains()
        domains = nixspamsum.getMXs()
    if not domains:
        print "No NiX Spam DNSBL rejects found"

    from cStringIO import StringIO
    output = StringIO()
    # build the table
    if options.oFormat == 'table':
        buildTable(output, domains, options.percent, options.order)
    # generate comma separated values
    elif options.oFormat == 'csv':
        order = domains.keys()
        for d in order:
            output.write("'%s',%d\n" % (d, domains[d]))
    # should never be reached
        print "Oops, error in function showResult() happend"
    # show the result
    print output.getvalue()

def main():
    parser = getOptionParser()
    opts, args = parser.parse_args()
    if not args:
        parser.error('No log file specified')
    # drop duplicates
    log_files = set(args)
    # remove inexistent/unreadable files
    if not check_files(log_files):
        os.sys.stderr.write('No readable log files found\n')
        return 1

    nixss = NiXSapmSum()

    fi = fileinput.FileInput(log_files, openhook=fileinput.hook_compressed)
    showResult(nixss, opts)
    return 0

if __name__ == '__main__':