#!/usr/local/bin/perl # $Id: agentcounter.pl,v 1.4 1997/07/16 02:15:47 elkner Exp elkner $ # agentcounter: Program to generate statistics from squids useragent log files # accessing squid proxy server. # Derived from Jens Elkner's BrowserCounter 1.2.2-E # ( http://irb.cs.uni-magdeburg.de/~elkner/webtools/ ) which is # Derived from Benjamin Franz' BrowserCounter 1.2.1. # ( http://www.netimages.com/~snowhare/utilities/browsercounter.html ) # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS # OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE. # # I offer it to the public domain and I ask, however, that this paragraph # and my name be retained in any modified versions of the file you may # make, and that you notify me of any improvements you make to the code. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, # SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE # USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT # LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE require "ctime.pl"; require "getopts.pl"; $Version="AgentCounter 1.2.2"; $Zcat="/usr/local/bin/gzip -cd"; # Type of log file being parsed ('agent','combined') $LogType='agent'; # Default Name of server $HTTPDSERVER='proxycache.cs.uni-magdeburg.de'; # cellpadding size $cellpadding=2; # Border size $border=2; $newsection="


\n

Top\n"; $filecounter=0; @start = (); @stop = (); &Initialize; foreach (@Includes) { $tmp = $_; &ReadOld($tmp) if &OpenFile($_); } foreach (@ARGV) { &ReadLog if &OpenFile($_); } if ($filecounter > 0) { &AnalyzeAgents; &PrintReport; } else { warn "No files found for analyzing!\n"; } # --------------------------------- That's it -------------------------------- sub OpenFile { # assuming these files are real logfiles if ($_[0]=~m/(\.gz|\.Z)/o) { $_[0]="$Zcat $_[0] |"; } # this has to be an uncompressed log file if (open(AGENTLOG,"$_[0]")) { $filecounter++; 1; } else { warn "Can't open $_[0]: $!\n"; 0; } } sub DeSpoofAgent { # Despoofs WebTV spoofing as MSIE spoofing as Mozilla. if ($Agent =~ m#^\S+\s+(WebTV/\S+)#o) { $Agent = "$1 spoofing as $Agent"; } # despoofs people using pseudo-'standard' of 'compatible' if ($Agent =~ m#^Mozilla.*\(compatible; *([^;)]+)#oi) { $spoofer = $1; $spoofer =~ s#/#-#og; $spoofer =~ s/\W+$//o; $Agent="$spoofer spoofing as $Agent"; } } sub ReadLog { local($i) = 1; while ($line=) { $refscounter++; chomp $line; $line =~ s#\s+# #go; # Fixes proxy info bug. Fix suggested by ($IP,$TimeDate,$Agent) = $line =~ /^(\S+) \[([^\]\[]+)\] \"([^\"]*)\"/o; # Check for Proxies if ( $i ) { $i = &Date2String($TimeDate); push(@start,$i); $i = 0; } &DeSpoofAgent; # Lets not let children play with dangerous toys... $Agent =~ s#<#\<#go; $Agent =~ s#\&#\&#go; $Agent =~ s#>#\>#go; $Agent =~ s#"#\"#go; # strip leading chr(0) $Agent =~ s/^\x0*//; $Agent = "Unknown" if (! $Agent); $rawagents{$Agent}++; } close(AGENTLOG); $i = &Date2String($TimeDate); push(@stop,$i); } sub AnalyzeAgents { foreach $agent (keys (%rawagents)) { $longagent=$agent; ($base) = $longagent =~ m#^([^\(\[]+)#o; $base =~ s#\s+$##o; ($name,$version) = $base =~ m#^([^\d\/]+)[\s\/vV]+(\d[\.\d]+)#o; if ( ($name eq "") || ($name =~ /^\ +/)) { ($name) = split(/\s+/,$base); } $agentgroup{$name} += $rawagents{$agent}; $agentversion{"$name $version"} += $rawagents{$agent}; $baseagent{$base} += $rawagents{$agent}; } } sub PrintReport { local($date); $li="
  • \n", "\n", "WWW Agent Statistics for ", $HTTPDSERVER, "\n", "\n", "\n\n", "

    WWW Agent Statistics for ",$HTTPDSERVER,"

    \n", "
    \n"; &PrintSummary; print "
      \n", "$li#broad\">Summary\n", "$li#version\">Summary by version\n", "$li#detail\">Summary by fine detail of version\n", "$li#complete\">Detailed report\n", "
    \n"; &PrintBroadVersion; &PrintVersion; &PrintDetailedVersion; &PrintComplete; $date = localtime(time); print "


    \n\n\n
    ", "Generated by \n", "", $Version,"\n", "by Jens Elkner\n", "Report Last Modified: $date\n", "
    \n\n"; select(STDOUT); if ( $OutputFile ) { rename "$OutputFile.$$", "$OutputFile"; select(STDOUT); } } sub PrintSummary { local($dd,$mm,$yy,$time,$offset,$period); @tmp = sort @start; ($yy,$mm,$dd,$time,$offset) = split(/,/,$tmp[0]); $period = "$dd/$Number2Month{$mm}/$yy $time $offset  to  "; @tmp = sort @stop; ($yy,$mm,$dd,$time,$offset) = split(/,/,$tmp[$#tmp]); $period .= "$dd/$Number2Month{$mm}/$yy $time $offset"; print "

    \nPeriod Covered: ", $period, "
    \n", "Web Browser Hits measured: ", $refscounter, "
    \n", "

    \n"; } sub GetPercent { # $_[0] = relative value, $_[1] = absolut value $percent = 100 * $_[0] / $_[1]; $percent = sprintf("%5.2f",$percent); return $percent; } sub PrintBroadVersion { print $newsection, "broad\">Summary

  • ", "", "
    Hits Percent Browser
    \n"; foreach $key (sort AgentByHits keys(%agentgroup)) { print "
    ", $agentgroup{$key} , " ", "", &GetPercent($agentgroup{$key},$refscounter), " ", "%", $key, "
    \n"; } print "
    \n", $return2index; } sub PrintVersion { print $newsection, "version\">Summary by version\n\n", "\n", "
    Hits Percent Browser
    \n"; foreach $key (sort VersionByHits keys(%agentversion)) { print "
    ",$agentversion{$key}," ", "", &GetPercent($agentversion{$key},$refscounter)," ", "%", $key, "
    \n"; } print "
    \n", $return2index; } sub PrintDetailedVersion { print $newsection, "detail\">Summary by fine detail of version\n\n", "", "
    Hits Percent Browser
    \n"; foreach $key (sort BaseByHits keys(%baseagent)) { print "
    ",$baseagent{$key}, " ", "", &GetPercent($baseagent{$key},$refscounter), " ", "%", $key ,"
    \n"; } print "
    \n", $return2index; } sub PrintComplete { print $newsection, "complete\">Detailed report\n\n", "\n", "
    Hits Percent Browser
    \n"; foreach $key (sort keys(%rawagents)) { print "
    ", $rawagents{$key}, " ", "", &GetPercent($rawagents{$key},$refscounter), " ", "%", $key, "
    \n"; } print "
    \n", $return2index; } sub AgentByHits { $tmp = $agentgroup{$b}<=>$agentgroup{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub VersionByHits { $tmp = $agentversion{$b}<=>$agentversion{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub BaseByHits { $tmp = $baseagent{$b}<=>$baseagent{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub Date2String { local($dd,$mm,$yy,$time,$offset,$tmp); $_ = $_[0]; /\d+\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/\d+[:\ ]\d+:\d+:\d+\s+[+|-]\d+/; $tmp = $&; ($tmp,$time,$offset) = split(/ /,$tmp); if (! $offset ) { $offset = $time; ($tmp,$time,$dd,$mm) = split(/:/,$tmp); $time .= ":$dd:$mm"; } ($dd,$mm,$yy) = split(/\//,$tmp); $tmp = "$yy,$Month2Number{$mm},$dd,$time,$offset"; return $tmp; } sub ReadOld { local($tmp, $tmp1); while () { chop; last if (m##oi); # check for end of html file if (/Period\s+Covered/oi) { # get old period covered /to/; $tmp = $`; $tmp1 = $'; $tmp = &Date2String($tmp); push(@start,$tmp); $tmp = &Date2String($tmp1); push(@stop,$tmp); } if (//oi) { &ReadOldStats; last; } } close(AGENTLOG); } sub ReadOldStats { while () { last if (m##oi); next if ( $_ !~ m##); m#<[^>]+>\s*(\d+)\s*<[^>]+>.*<[^>]+>\s*(.*)\s*
    \s*$#oi; if ( $2 ) { $rawagents{$2} += $1; $refscounter += $1; } } } sub Version { die <<"EndVersion"; This is $Version. It is Jens Elkner\'s modified version of browsercounter 1.2.2-E. EndVersion } sub Usage { die <<"EndUsage"; Process a sequence of Squids useragent log files and output an HTML summary. Usage: agentcounter [-h] [-v] [-N Proxy-Server-Name] [-i fileList] [-o afile] [logfile ...] [logfile.gz ...] [logfile.Z ...] Options: -h Display this message and quit. -v Display version -N name Proxy Server name for report -i fileList a comma separated list of old statfiles files for inclusion -o afile Output file (default = Standard Output) EndUsage } sub Initialize { %Number2Month=( '01','Jan', '02','Feb', '03','Mar', '04','Apr', '05','May', '06','Jun', '07','Jul', '08','Aug', '09','Sep', '10','Oct', '11','Nov', '12','Dec', ); %Month2Number=( 'Jan','01', 'Feb','02', 'Mar','03', 'Apr','04', 'May','05', 'Jun','06', 'Jul','07', 'Aug','08', 'Sep','09', 'Oct','10', 'Nov','11', 'Dec','12', ); $result = &Getopts('hi:N:o:v'); &Usage if $opt_h || $result == 0; &Version if $opt_v; $HTTPDSERVER=$opt_N if $opt_N; if ($opt_i) { @Includes = split(",",$opt_i); } if ($opt_o && ( $opt_o ne '-')) { $OutputFile = $opt_o; open(OUT,">$OutputFile.$$") || die "Can not open $OutputFile: $!\n"; } }