#!/usr/bin/perl # # $Id: Apache2Dot.pl,v 1.1 2007/03/10 21:02:09 rdilley Exp rdilley $ # # author: ron dilley # # desc: this perl script does something # # Copyright (C) 2006 Ron A. Dilley # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; version 2 # of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # ron.dilley@uberadmin.com # ############################################################################ # # modules # use Getopt::Std; # # pragmas # use strict; # # set environment # $ENV{PATH} = "/usr/bin:/bin:/usr/sbin:/sbin:/usr/ucb"; # # turn on autoflush # select STDERR; $| = 1; select STDOUT; $| = 1; # # defines # $::TRUE = 1; $::FALSE = 0; $::FAILED = -1; $::VERSION = '$Id: Apache2Dot.pl,v 1.1 2007/03/10 21:02:09 rdilley Exp rdilley $'; $::PROGNAME = "Apache2Dot.pl"; %::Config = (); $::Config{'debug'} = $::FALSE; $::Config{'ignorePics'} = $::FALSE; $::Config{'table'} = $::FALSE; # # main routine # if ( &main() != $::TRUE ) { exit( 1 ); } exit( 0 ); ############################################################################ # # sub-routines # # # main routine # sub main { my $arg; # # display script banner # &show_banner(); # # parse command-line # &parse_command_line(); # process args that are left if ( scalar( @::ARGV ) > 0 ) { while( $arg = shift( @::ARGV ) ) { &parseApacheLog( $arg ); } } if ( $::Config{'table'} ) { &genTable( $::Config{'graphLabel'} ); } else { &genDiGraph( $::Config{'graphLabel'} ); } # done return $::TRUE; } # # display banner info # sub show_banner { print stderr "$::VERSION\n"; print stderr "By: Ron Dilley\n"; print stderr "\n"; print stderr "$::PROGNAME comes with ABSOLUTELY NO WARRANTY.\n"; print stderr "This is free software, and you are welcome\n"; print stderr "to redistribute it under certain conditions;\n"; print stderr "See the GNU General Public License for details.\n"; print stderr "\n"; return $::TRUE; } # # display help info # sub show_help { print stderr "Syntax:\n"; print stderr "\n"; print stderr "$::PROGNAME [options] {file} [{file} ...]\n"; print stderr "\n"; print stderr "-d {0-9} Display debug information during program run\n"; print stderr "-l {label} Label for graph\n"; print stderr "-i Ignore uri's with known image extensions\n"; print stderr "-t Print as table\n"; print stderr "\n"; return $::TRUE; } # # parse command-line arguments # sub parse_command_line { no strict 'vars'; if ( getopts( 'd:l:it' ) == $::FALSE ) { &show_help(); return $::FAILED; } if ( defined $opt_d ) { if ( $opt_d > 0 ) { # set debug mode $::Config{'debug'} = $opt_d; } } if ( defined $opt_l ) { if ( length( $opt_l ) > 0 ) { $::Config{'graphLabel'} = $opt_l; } } if ( defined $opt_i ) { $::Config{'ignorePics'} = $::TRUE; } if ( defined $opt_t ) { $::Config{'table'} = $::TRUE; } return $::TRUE; } # # read log file # # 199.67.203.142 - - [30/Jan/2007:00:38:57 -0800] "GET /img/global/gray_round_arrow.gif HTTP/1.1" 304 0 "-" "Mozilla/4.0 (compatible;)" # 208.254.60.228 - - [12/Jan/2007:04:23:31 -0800] "GET /investors/inv_stock_quote.jsp?reload=-2026640553 HTTP/1.0" 200 12871 "-" "googlebot_amgen (Enterprise; MID-00362; bryanf@amgen.com)" # 172.20.1.90 - - [26/Apr/2008:17:06:41 -0700] "GET /Images/DestIP_Over_24h.png HTTP/1.1" 304 - "http://www.uberadmin.com/" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14" # 172.20.1.90 - - [26/Apr/2008:17:06:47 -0700] "GET /Images/SrcPort_Over_24h.png HTTP/1.1" 304 - "http://www.uberadmin.com/" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14" sub parseApacheLog { my ( $fname ) = @_; my $line; my $sourceIp; my $identd; my $userid; my $timeStamp; my $uri; my $statusCode; my $dataSize; my $refUri; my $agentString; my $lineColor; my $lineEnd; #my $command; if ( $fname eq "-" ) { # use stdin } else { # open the file if ( ! defined open( INFILE, $fname ) ) { print stderr "ERR - Unable to open log file [$fname]\n"; return $::FAILED; } } while( $line = ) { chomp( $line ); if ( $line =~ m/.*\/private\/.*/ ) { $line = ""; } elsif ( $line =~ m/.*awstats.*/ ) { $line = ""; } # parse that line if ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+(\d+)\s+\"-\".*$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; $dataSize = $7; #$uri =~ s/ HTTP\/\d+\.\d+$//; if ( exists $::nodes{$uri} ) { # update seen counter $::nodes{$uri}{'counter'}++; # look up referer } else { $::nodes{$uri}{'counter'} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+-\s+\"-\".*$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; $dataSize = 0; #$uri =~ s/ HTTP\/\d+\.\d+$//; if ( exists $::nodes{$uri} ) { # update seen counter $::nodes{$uri}{'counter'}++; # look up referer } else { $::nodes{$uri}{'counter'} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"HEAD (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+(\d+)\s+\"-\".*$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; $dataSize = $7; #$uri =~ s/ HTTP\/\d+\.\d+$//; if ( exists $::nodes{$uri} ) { # update seen counter $::nodes{$uri}{'counter'}++; # look up referer } else { $::nodes{$uri}{'counter'} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*)\"\s+(\d+)\s+(\d+)\s+\"-\".*$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; $dataSize = $7; #$uri =~ s/ HTTP\/\d+\.\d+$//; if ( exists $::nodes{$uri} ) { # update seen counter $::nodes{$uri}{'counter'}++; # look up referer } else { $::nodes{$uri}{'counter'} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"HEAD (.*)\"\s+(\d+)\s+(\d+)\s+\"-\".*$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; $dataSize = $7; #$uri =~ s/ HTTP\/\d+\.\d+$//; if ( exists $::nodes{$uri} ) { # update seen counter $::nodes{$uri}{'counter'}++; # look up referer } else { $::nodes{$uri}{'counter'} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+(\d+)\s+\"(.*)\"\s+\".*\"$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; #$uri =~ s/ HTTP\/\d+\.\d+$//; $dataSize = $7; $refUri = $8; $refUri =~ s/^http:\/\///; $refUri =~ s/^www.uberadmin.com//; $refUri =~ s/^www.uberadmin.net//; $refUri =~ s/^www.uberadmin.org//; if ( $refUri =~ m/^www.google\..*/ ) { $refUri = "www.google"; } elsif ( $refUri =~ m/^images.google\..*/ ) { $refUri = "images.google"; } if ( exists $::nodes{$refUri} ) { # increase seen counter $::nodes{$refUri}{'counter'}++; # search uri list if ( exists $::nodes{$refUri}{$uri} ) { $::nodes{$refUri}{$uri}++; } else { $::nodes{$refUri}{$uri} = 1; } } else { $::nodes{$refUri}{counter} = 1; $::nodes{$refUri}{$uri} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+-\s+\"(.*)\"\s+\".*\"$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; #$uri =~ s/ HTTP\/\d+\.\d+$//; $dataSize = 0; $refUri = $8; $refUri =~ s/^http:\/\///; $refUri =~ s/^www.uberadmin.com//; $refUri =~ s/^www.uberadmin.net//; $refUri =~ s/^www.uberadmin.org//; if ( $refUri =~ m/^www.google\..*/ ) { $refUri = "www.google"; } elsif ( $refUri =~ m/^images.google\..*/ ) { $refUri = "images.google"; } if ( exists $::nodes{$refUri} ) { # increase seen counter $::nodes{$refUri}{'counter'}++; # search uri list if ( exists $::nodes{$refUri}{$uri} ) { $::nodes{$refUri}{$uri}++; } else { $::nodes{$refUri}{$uri} = 1; } } else { $::nodes{$refUri}{counter} = 1; $::nodes{$refUri}{$uri} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"HEAD (.*) HTTP\/\d+\.\d+\"\s+(\d+)\s+(\d+)\s+\"(.*)\"\s+\".*\"$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; #$uri =~ s/ HTTP\/\d+\.\d+$//; $dataSize = $7; $refUri = $8; $refUri =~ s/^http:\/\///; $refUri =~ s/^www.uberadmin.com//; $refUri =~ s/^www.uberadmin.net//; $refUri =~ s/^www.uberadmin.org//; if ( $refUri =~ m/^www.google\..*/ ) { $refUri = "www.google"; } elsif ( $refUri =~ m/^images.google\..*/ ) { $refUri = "images.google"; } if ( exists $::nodes{$refUri} ) { # increase seen counter $::nodes{$refUri}{'counter'}++; # search uri list if ( exists $::nodes{$refUri}{$uri} ) { $::nodes{$refUri}{$uri}++; } else { $::nodes{$refUri}{$uri} = 1; } } else { $::nodes{$refUri}{counter} = 1; $::nodes{$refUri}{$uri} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"GET (.*)\"\s+(\d+)\s+(\d+)\s+\"(.*)\"\s+\".*\"$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; #$uri =~ s/ HTTP\/\d+\.\d+$//; $dataSize = $7; $refUri = $8; $refUri =~ s/^http:\/\///; $refUri =~ s/^www.uberadmin.com//; $refUri =~ s/^www.uberadmin.net//; $refUri =~ s/^www.uberadmin.org//; if ( $refUri =~ m/^www.google\..*/ ) { $refUri = "www.google"; } elsif ( $refUri =~ m/^images.google\..*/ ) { $refUri = "images.google"; } if ( exists $::nodes{$refUri} ) { # increase seen counter $::nodes{$refUri}{'counter'}++; # search uri list if ( exists $::nodes{$refUri}{$uri} ) { $::nodes{$refUri}{$uri}++; } else { $::nodes{$refUri}{$uri} = 1; } } else { $::nodes{$refUri}{counter} = 1; $::nodes{$refUri}{$uri} = 1; } } elsif ( $line =~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+\"HEAD (.*)\"\s+(\d+)\s+(\d+)\s+\"(.*)\"\s+\".*\"$/ ) { $sourceIp = $1; $identd = $2; $userid = $3; $timeStamp = $4; #$command = $5; $statusCode = $6; $uri = "[$statusCode] $5"; #$uri =~ s/ HTTP\/\d+\.\d+$//; $dataSize = $7; $refUri = $8; $refUri =~ s/^http:\/\///; $refUri =~ s/^www.uberadmin.com//; $refUri =~ s/^www.uberadmin.net//; $refUri =~ s/^www.uberadmin.org//; if ( $refUri =~ m/^www.google\..*/ ) { $refUri = "www.google"; } elsif ( $refUri =~ m/^images.google\..*/ ) { $refUri = "images.google"; } if ( exists $::nodes{$refUri} ) { # increase seen counter $::nodes{$refUri}{'counter'}++; # search uri list if ( exists $::nodes{$refUri}{$uri} ) { $::nodes{$refUri}{$uri}++; } else { $::nodes{$refUri}{$uri} = 1; } } else { $::nodes{$refUri}{counter} = 1; $::nodes{$refUri}{$uri} = 1; } } else { print stderr "Unknown line [$line]\n"; } } close( INFILE ); return $::TRUE; } # # generate digraph # sub genDiGraph { my ( $labelName ) = @_; my $node; my $link; my %tmpNodes; my $tmpNodesPtr; my $lineColor; my $lineEnd; my $tmpLink; my $tmpNode; my $fontColor; print "digraph G \{\n"; print " \"$labelName\" \[shape\=box\]\;\n"; print " margin\=\".5\"\;\n"; print " fontsize\=\"auto\"\;\n"; print " size\=\"16,10.5\"\;\n"; print " ratio\=\"compress\"\;\n"; #print " constraint\=\"true\"\;\n"; print " orientation\=\"landscape\"\;\n"; #print " rotate\=\"90\"\;\n"; foreach $node ( keys( %::nodes ) ) { $tmpNodesPtr = $::nodes{$node}; %tmpNodes = %$tmpNodesPtr; if ( scalar keys( %tmpNodes ) == 1 ) { if ( $node =~ m/^\[(\d_)\].*/ ) { $fontColor = "red"; } else { $fontColor = "black"; } $tmpNode = $node; $tmpNode =~ s/\[\d+\] (.*)/$1/; #print " \"$tmpNode\" \[fontcolor\=$fontColor\]\;\n"; } else { foreach $link ( keys( %tmpNodes ) ) { $lineColor = "black"; $lineEnd = "normal"; if ( $link ne 'counter' ) { if ( $link =~ m/^\[(\d+)\].*/ ) { if ( $1 >= 400 & $1 < 600 ) { # error $lineColor = "red"; $lineEnd = "dot"; } elsif ( $1 >= 300 & $1 < 400 ) { # cached or something $lineColor = "green"; } } if ( $link =~ m/.*\.gif/ | $link =~ m/.*\.jpg/ | $link =~ m/.*\.png/ ) { $lineColor = "blue"; $lineEnd = "none"; if ( $::Config{'ignorePics'} ) { next; } } $tmpLink = $link; $tmpLink =~ s/\[\d+\] (.*)/$1/; print " \"$node\" -> \"$tmpLink\" \[arrowhead=$lineEnd,color\=$lineColor,label\=\"$::nodes{$node}{$link}\",weight\=$::nodes{$node}{$link}\]\;\n"; } } } } print "\}\n"; return $::TRUE; } # # generate text table # sub genTable { my ( $labelName ) = @_; my $node; my $link; my %tmpNodes; my $tmpNodesPtr; my $lineColor; my $lineEnd; my $tmpLink; my $tmpNode; my $fontColor; print "$labelName\n"; print "-----\n"; foreach $node ( keys( %::nodes ) ) { $tmpNodesPtr = $::nodes{$node}; %tmpNodes = %$tmpNodesPtr; if ( scalar keys( %tmpNodes ) == 1 ) { print "$node\n"; } else { foreach $link ( keys( %tmpNodes ) ) { if ( $link ne 'counter' ) { if ( $link =~ m/.*\.gif/ | $link =~ m/.*\.jpg/ | $link =~ m/.*\.png/ ) { if ( $::Config{'ignorePics'} ) { next; } } print "$node -> $link\n"; } } } } return $::TRUE; }