#!/usr/bin/perl # # pflogsumm.pl - Produce summaries of Postfix/VMailer MTA in logfile - # Copyright (C) 1998-99 by James S. Seymour (jseymour@jimsun.LinxNet.com) # (See "License", below.) Version 19990121-01 # # Usage: # pflogsumm.pl -[emq] [-d ] [-c ] [file1 [filen]] # # Options: # -c top to display in each traffic catagory # # -d today means just today # -d yesterday means just "yesterday" # # -e extended (extreme? excessive?) detail - emit detailed # reports. At present, this includes only a per-message # report, sorted by sender domain, then user-in-domain, # then by queue i.d. # # WARNING: the data built to generate this report can # quickly consume very large amounts of memory if a lot # of log entries are processed! # # -m modify (mung?) UUCP-style bang-paths # # This is for use when you have a mix of Internet-style # domain addresses and UUCP-style bang-paths in the log. # Upstream UUCP feeds sometimes mung Internet domain # style address into bang-paths. This option can # sometimes undo the "damage". For example: # "somehost.dom!username@foo" (where "foo" is the next # host upstream and "somehost.dom" was whence the email # originated) will get converted to # "foo!username@somehost.dom". This also affects the # extended detail report (-e), to help ensure that by- # domain-by-name sorting is more accurate. # # -q quiet - don't print headings for empty reports (note: # headings for warning, fatal, and "master" messages will # always be printed.) # # If no file(s) specified, reads from stdin. Output is to stdout. # # Typical usage: # Produce a report of previous day's activities: # pflogsumm.pl -d yesterday /var/log/syslog # A report of prior week's activities (after logs rotated): # pflogsumm.pl /var/log/syslog.1 # What's happened so far today: # pflogsumm.pl -d today /var/log/syslog # # Notes: # # For display purposes: integer values are munged into "kilo" and # "mega" notation as they exceed certain values. I chose the # admittedly arbitrary boundaries of 512k and 512m as the points # at which to do this--my thinking being 512x was the largest # number (of digits) that most folks can comfortably grok # at-a-glance. These are "computer" "k" and "m", not 1000 and # 1,000,000. You can easily change all of this with some # constants near the beginning of the program. # # The "messages-per-day" report is not generated for single-day # reports. For multiple-day reports: "messages-per-hour" numbers # are daily averages (reflected in the report heading). # # It's important that the logs are presented to pflogsumm in # chronological order so that message sizes are available when # needed. # # A note to the software-savvy: there will appear to be a lot of # duplication (code-wise) herein. There is. This is generally # done for the performance benefit of avoiding subroutine call # overhead in loops. # # License: # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You may have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, # USA. # # An on-line copy of the GNU General Public License can be found # http://www.fsf.org/copyleft/gpl.html. # # Thanks To: # Paul D. Robertson # Simon J Mudd use strict; use Getopt::Std; # Variables and constants used throughout pflogsumm use vars qw( $progName $usageMsg $opt_c $opt_d $opt_e $opt_m $opt_q $divByOneKAt $divByOneMegAt $oneK $oneMeg @monthNames %monthNums $thisYr $thisMon ); # Some constants used by display routines. I arbitrarily chose to # display in kilobytes and megabytes at the 512k and 512m boundaries, # respectively. Season to taste. $divByOneKAt = 524288; # 512k $divByOneMegAt = 536870912; # 512m $oneK = 1024; # 1k $oneMeg = 1048576; # 1m # Constants used throughout pflogsumm @monthNames = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); %monthNums = qw( Jan 0 Feb 1 Mar 2 Apr 3 May 4 Jun 5 Jul 6 Aug 7 Sep 8 Oct 9 Nov 10 Dec 11); ($thisMon, $thisYr) = (localtime(time()))[4,5]; # Variables used only in main loop my ( $cmd, $qid, $addr, $size, $relay, $status, $dateStr, %fatals, %warnings, %masterMsgs, %fromCnt, %fromSize, %msgSizes, %toCnt, %toSize, %deferred, %bounced, %noMsgSize, %msgDetail, $senderCnt, $recipCnt, $msgsRcvd, $msgsDlvrd, $sizeRcvd, $sizeDlvrd, $msgMonStr, $msgMon, $msgDay, $msgTimeStr, $msgHr, $msgMin, $msgSec, $msgYr, $revMsgDateStr, $dayCnt, %msgsPerDay, %rejects, %rcvdMsg, $fwdCnt ); # Messages received and delivered per hour my @rcvPerHr = qw(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0); my @dlvPerHr = @rcvPerHr; my $lastMsgDay = 0; $progName = "pflogsumm.pl"; $usageMsg = "usage: $progName -[emq] [-d ] [-c ] [file(s)]"; # I know: this is ugly $opt_c = 0; $opt_e = 0; $opt_m = 0; $opt_q = 0; getopts('c:d:eqm') || die "$usageMsg\n"; $dateStr = get_datestr($opt_d) if(defined($opt_d)); # debugging #open(UNPROCD, "> unprocessed") || # die "couldn't open \"unprocessed\": $!\n"; while(<>) { next if(defined($dateStr) && ! /^$dateStr/o); ($msgMonStr, $msgDay, $msgTimeStr, $cmd, $qid) = m#^(...)\s+([0-9]+)\s(..:..:..)\s.*?(?:vmailer|postfix)[-/]([^\[:]*).*?: ([^:]+)#o; ($msgMonStr, $msgDay, $msgTimeStr, $cmd, $qid) = m#^(...)\s+([0-9]+)\s(..:..:..)\s.*?(vmailer|postfix[^\[:]*).*?: ([^:]+)#o unless($cmd); next unless($cmd); chomp; # snatch out log entry date & time ($msgHr, $msgMin, $msgSec) = split(/:/, $msgTimeStr); $msgMon = $monthNums{$msgMonStr}; $msgYr = $thisYr; --$msgYr if($msgMon > $thisMon); # the following test depends on one getting more than one message a # month--or at least that successive messages don't arrive on the # same month-day in successive months :-) unless($msgDay == $lastMsgDay) { $lastMsgDay = $msgDay; $revMsgDateStr = sprintf "%d%02d%02d", $msgYr + 1900, $msgMon, $msgDay; ++$dayCnt; } if($qid eq 'warning') { ++$warnings{$cmd}; } elsif($qid eq 'fatal') { ++$fatals{$cmd}; } elsif($qid eq 'reject') { # This could get real ugly! # First: get everything following the "reject: " token my ($rejTyp, $rejFrom, $rejRmdr) = /^.* reject: ([^ ]+) from ([^:]+): (.*)$/o; # Next: get the reject "reason" (my $rejReas = $rejRmdr) =~ s/^(?:.*[:;] )?([^,]+).*$/$1/o; # stash in "triple-subscripted-array" ++$rejects{$rejTyp}{$rejReas}{gimme_domain($rejFrom)}; } elsif($cmd eq 'master') { ++$masterMsgs{(split(/^.*master.*: /))[1]}; } else { if((($addr, $size) = /from=<([^>]*)>, size=([0-9]+)/o) == 2) { next if($msgSizes{$qid}); # avoid double-counting! if($addr) { if($opt_m && $addr =~ /^(.*!)*([^!]+)!([^!@]+)@([^\.]+)$/o) { $addr = "$4!" . ($1? "$1" : "") . $3 . "\@$2"; } $addr =~ s/(@.+)/\L$1/o; } else { $addr = "from=<>" } $msgSizes{$qid} = $size; push(@{$msgDetail{$qid}}, $addr) if($opt_e); # Avoid counting forwards if($rcvdMsg{$qid}) { ++$senderCnt unless($fromCnt{$addr}); ++$fromCnt{$addr}; $fromSize{$addr} += $size; $sizeRcvd += $size; delete($rcvdMsg{$qid}); # limit hash size } } elsif((($addr, $relay, $status, my $rmdr) = /to=<([^>]*)>, relay=([^,]+),.*? status=([^ ]+)(.*)$/o) >= 3) { if($status eq 'sent') { # was it actually forwarded, rather than delivered? if($rmdr =~ /forwarded as /o) { ++$fwdCnt; next; } if($opt_m && $addr =~ /^(.*!)*([^!]+)!([^!@]+)@([^\.]+)$/o) { $addr = "$4!" . ($1? "$1" : "") . $3 . "\@$2"; } $addr =~ s/(@.+)/\L$1/o; ++$recipCnt unless($toCnt{$addr}); ++$toCnt{$addr}; ++$dlvPerHr[$msgHr]; ++${$msgsPerDay{$revMsgDateStr}}[1]; ++$msgsDlvrd; if($msgSizes{$qid}) { $toSize{$addr} += $msgSizes{$qid}; $sizeDlvrd += $msgSizes{$qid}; } else { $noMsgSize{$qid} = $addr; push(@{$msgDetail{$qid}}, "(sender not in log)") if($opt_e); # put this back later? mebbe with -v? # msg_warn("no message size for qid: $qid"); } push(@{$msgDetail{$qid}}, $addr) if($opt_e); } elsif($status eq 'deferred') { my ($deferredReas) = /, status=deferred \(([^:\)\/]+)/o; ++$deferred{$cmd}{$deferredReas}; } elsif($status eq 'bounced') { my ($bounceReas) = /, status=bounced \(([^.:;,\)]+)/o; ++$bounced{$relay}{$bounceReas}; } else { # print UNPROCD "$_\n"; } } elsif(($cmd eq 'pickup' && /: (sender|uid)=/o) || ($cmd eq 'smtpd' && /: client=/o)) { ++$rcvPerHr[$msgHr]; ++${$msgsPerDay{$revMsgDateStr}}[0]; ++$msgsRcvd; ++$rcvdMsg{$qid}; # quick-set a flag } else { # print UNPROCD "$_\n"; } } } # debugging #close(UNPROCD) || # die "problem closing \"unprocessed\": $!\n"; if(defined($dateStr)) { print "Postfix log summaries for $dateStr\n"; } print "\nGrand Totals\n------------\n"; print_val_with_title($msgsRcvd, "messages received"); print_val_with_title($msgsDlvrd, "messages delivered"); print_val_with_title($sizeRcvd, "bytes received"); print_val_with_title($sizeDlvrd, "bytes delivered"); print_val_with_title($senderCnt, "senders"); print_val_with_title($recipCnt, "recipients"); print_val_with_title($fwdCnt, "forwarded"); print_per_day_summary(\%msgsPerDay) if($dayCnt > 1); print_per_hour_summary(\@rcvPerHr, \@dlvPerHr, $dayCnt); print_hash_by_cnt_vals(\%fromCnt, "Senders by message count", $opt_c, $opt_q); print_hash_by_cnt_vals(\%toCnt, "Recipients by message count", $opt_c, $opt_q); print_hash_by_cnt_vals(\%fromSize, "Senders by message size", $opt_c, $opt_q); print_hash_by_cnt_vals(\%toSize, "Recipients by message size", $opt_c, $opt_q); print_hash_by_key(\%noMsgSize, "Messages with no size data", $opt_c, 1); print_nested_hash(\%deferred, "messages deferred", $opt_q); print_nested_hash(\%bounced, "messages bounced (by relay)", $opt_q); print_nested_hash(\%rejects, "messages rejected", $opt_q); print_hash_by_cnt_vals(\%warnings, "Warnings", 0, $opt_q); print_hash_by_cnt_vals(\%fatals, "Fatal Errors", 0, $opt_q); print_hash_by_cnt_vals(\%masterMsgs,"Master daemon messages", 0, $opt_q); print_detailed_msg_data(\%msgDetail, "Message detail", $opt_q) if($opt_e); # print an integer value with associated title sub print_val_with_title { my($value, $title) = @_; $value = 0 unless($value); my $units = ' '; if($value > $divByOneMegAt) { $value /= $oneMeg; $units = 'm' } elsif($value > $divByOneKAt) { $value /= $oneK; $units = 'k' } printf " %6d%s %s\n", $value, $units, $title; } # print "per-day" traffic summary # (done in a subroutine only to keep main-line code clean) sub print_per_day_summary { my($msgsPerDay) = @_; print < $b } keys(%$msgsPerDay)) { my ($msgYr, $msgMon, $msgDay) = unpack("A4 A2 A2", $_); my $msgMonStr = $monthNames[$msgMon]; printf " $msgMonStr %2d $msgYr", $msgDay; foreach my $value (@{$msgsPerDay->{$_}}) { $value = 0 unless($value); my $units = ' '; if($value > $divByOneMegAt) { $value /= $oneMeg; $units = 'm' } elsif($value > $divByOneKAt) { $value /= $oneK; $units = 'k' } printf " %6d%s", $value, $units; } print "\n"; } } # print "per-hour" traffic summary # (done in a subroutine only to keep main-line code clean) sub print_per_hour_summary { my($rcvPerHr, $dlvPerHr, $dayCnt) = @_; my $reportType = $dayCnt > 1? 'Daily Average' : 'Summary'; print < $divByOneMegAt) { $value /= $oneMeg; $units = 'm' } elsif($value > $divByOneKAt) { $value /= $oneK; $units = 'k' } printf " %6d%s", $value, $units; } print "\n"; } } # print hash contents sorted by numeric values in descending # order (i.e.: highest first) sub print_hash_by_cnt_vals { my($hashRef, $title, $cnt, $quiet) = @_; my $dottedLine; $title = sprintf "%s%s", $cnt? "top $cnt " : "", $title; unless(%$hashRef) { return if($quiet); $dottedLine = ": none"; } else { $dottedLine = "\n" . "-" x length($title); } printf "\n$title$dottedLine\n"; really_print_hash_by_cnt_vals($hashRef, $cnt, ' '); } # print hash contents sorted by key in ascending order sub print_hash_by_key { my($hashRef, $title, $cnt, $quiet) = @_; my $dottedLine; $title = sprintf "%s%s", $cnt? "first $cnt " : "", $title; unless(%$hashRef) { return if($quiet); $dottedLine = ": none"; } else { $dottedLine = "\n" . "-" x length($title); } printf "\n$title$dottedLine\n"; foreach (sort keys(%$hashRef)) { printf " %s %s\n", $_, $hashRef->{$_}; last if --$cnt == 0; } } # print "nested" hashes sub print_nested_hash { my($hashRef, $title, $quiet) = @_; my $dottedLine; unless(%$hashRef) { return if($quiet); $dottedLine = ": none"; } else { $dottedLine = "\n" . "-" x length($title); } printf "\n$title$dottedLine\n"; walk_nested_hash($hashRef, 0); } # "walk" a "nested" hash sub walk_nested_hash { my ($hashRef, $level) = @_; $level += 2; my $indents = ' ' x $level; my ($keyName, $hashVal) = each(%$hashRef); if(ref($hashVal) eq 'HASH') { foreach (sort keys %$hashRef) { print "$indents$_\n"; walk_nested_hash($hashRef->{$_}, $level); } } else { really_print_hash_by_cnt_vals($hashRef, 0, $indents); # print "\n" } } # print per-message info in excruciating detail :-) sub print_detailed_msg_data { use vars '$hashRef'; local($hashRef) = $_[0]; my($title, $quiet) = @_[1,2]; my $dottedLine; unless(%$hashRef) { return if($quiet); $dottedLine = ": none"; } else { $dottedLine = "\n" . "-" x length($title); } printf "\n$title$dottedLine\n"; foreach (sort by_domain_then_user keys(%$hashRef)) { printf " %s %s\n", $_, shift(@{$hashRef->{$_}}); foreach (@{$hashRef->{$_}}) { print " $_\n"; } print "\n"; } } # *really* print hash contents sorted by numeric values in descending # order (i.e.: highest first) :-) sub really_print_hash_by_cnt_vals { my($hashRef, $cnt, $indents) = @_; foreach (reverse sort { $hashRef->{$a} <=> $hashRef->{$b} } keys(%$hashRef)) { my $value = $hashRef->{$_}; my $units = ' '; if($value > $divByOneMegAt) { $value /= $oneMeg; $units = 'm' } elsif($value > $divByOneKAt) { $value /= $oneK; $units = 'k' } printf "$indents%6d%s %s\n", $value, $units, $_; last if --$cnt == 0; } } # subroutine to sort by domain, then user in domain, then by queue i.d. # Note: mixing Internet-style domain names and UUCP-style bang-paths # may confuse this thing. An attempt is made to use the first host # preceding the username in the bang-path as the "domain" if none is # found otherwise. sub by_domain_then_user { # first see if we can get "user@somedomain" my($userNameA, $domainA) = split(/\@/, ${$hashRef->{$a}}[0]); my($userNameB, $domainB) = split(/\@/, ${$hashRef->{$b}}[0]); # try "somedomain!user"? ($userNameA, $domainA) = (split(/!/, ${$hashRef->{$a}}[0]))[-1,-2] unless($domainA); ($userNameB, $domainB) = (split(/!/, ${$hashRef->{$b}}[0]))[-1,-2] unless($domainB); # now re-order "mach.host.dom"/"mach.host.do.co" to # "host.dom.mach"/"host.do.co.mach" $domainA =~ s/^(.*)\.([^\.]+)\.([^\.]{3}|[^\.]{2,3}\.[^\.]{2})$/$2.$3.$1/o if($domainA); $domainB =~ s/^(.*)\.([^\.]+)\.([^\.]{3}|[^\.]{2,3}\.[^\.]{2})$/$2.$3.$1/o if($domainB); # oddly enough, doing this here is marginally faster than doing # an "if-else", above. go figure. $domainA = "" unless($domainA); $domainB = "" unless($domainB); if($domainA lt $domainB) { return -1; } elsif($domainA gt $domainB) { return 1; } else { # disregard leading bang-path $userNameA =~ s/^.*!//o; $userNameB =~ s/^.*!//o; if($userNameA lt $userNameB) { return -1; } elsif($userNameA gt $userNameB) { return 1; } else { if($a lt $b) { return -1; } elsif($a gt $b) { return 1; } } } return 0; } # return a date string to match in log sub get_datestr { my $dateOpt = $_[0]; my $aDay = 60 * 60 * 24; my $time = time(); if($dateOpt eq "yesterday") { $time -= $aDay; } elsif($dateOpt ne "today") { die "$usageMsg\n"; } my ($t_mday, $t_mon) = (localtime($time))[3,4]; return sprintf("%s %2d", $monthNames[$t_mon], $t_mday); } # if there's a real domain: uses that. Otherwise uses the first # three octets of the IP addr. (In the latter case: usually pretty # safe to assume it's a dialup with a class C IP addr.) Lower- # cases returned domain name. sub gimme_domain { $_ = $_[0]; # split domain/ipaddr into separates my($domain, $ipAddr) = /^([^\[]+)\[([^\]]+)\]:?$/o; # now re-order "mach.host.dom"/"mach.host.do.co" to # "host.dom.mach"/"host.do.co.mach" if($domain eq 'unknown') { ($domain = $ipAddr) =~ s/\.[0-9]+$//o; } else { $domain =~ s/^(.*)\.([^\.]+)\.([^\.]{3}|[^\.]{2,3}\.[^\.]{2})$/\L$2.$3/o; } return $domain; } ### ### Warning and Error Routines ### # Emit warning message to stderr sub msg_warn { print STDERR "warning: $progName: $_[0]\n"; }