#!/usr/local/bin/perl
# $Id: agentcounter.pl,v 1.4 1997/07/16 02:15:47 elkner Exp elkner $
# agentcounter: Program to generate statistics from squids useragent log files
# accessing squid proxy server.
# Derived from Jens Elkner's BrowserCounter 1.2.2-E
# ( http://irb.cs.uni-magdeburg.de/~elkner/webtools/ ) which is
# Derived from Benjamin Franz' BrowserCounter 1.2.1.
# ( http://www.netimages.com/~snowhare/utilities/browsercounter.html )
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE.
#
# I offer it to the public domain and I ask, however, that this paragraph
# and my name be retained in any modified versions of the file you may
# make, and that you notify me of any improvements you make to the code.
#
# Use of this software in any way or in any form, source or binary,
# is not allowed in any country which prohibits disclaimers of any
# implied warranties of merchantability or fitness for a particular
# purpose or any disclaimers of a similar nature.
#
# IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
# USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT
# LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE
require "ctime.pl";
require "getopts.pl";
$Version="AgentCounter 1.2.2";
$Zcat="/usr/local/bin/gzip -cd";
# Type of log file being parsed ('agent','combined')
$LogType='agent';
# Default Name of server
$HTTPDSERVER='proxycache.cs.uni-magdeburg.de';
# cellpadding size
$cellpadding=2;
# Border size
$border=2;
$newsection="
\n
Top\n";
$filecounter=0;
@start = ();
@stop = ();
&Initialize;
foreach (@Includes) {
$tmp = $_;
&ReadOld($tmp) if &OpenFile($_);
}
foreach (@ARGV) {
&ReadLog if &OpenFile($_);
}
if ($filecounter > 0) {
&AnalyzeAgents;
&PrintReport;
}
else {
warn "No files found for analyzing!\n";
}
# --------------------------------- That's it --------------------------------
sub OpenFile {
# assuming these files are real logfiles
if ($_[0]=~m/(\.gz|\.Z)/o) {
$_[0]="$Zcat $_[0] |";
}
# this has to be an uncompressed log file
if (open(AGENTLOG,"$_[0]")) {
$filecounter++;
1;
}
else {
warn "Can't open $_[0]: $!\n";
0;
}
}
sub DeSpoofAgent {
# Despoofs WebTV spoofing as MSIE spoofing as Mozilla.
if ($Agent =~ m#^\S+\s+(WebTV/\S+)#o) {
$Agent = "$1 spoofing as $Agent";
}
# despoofs people using pseudo-'standard' of 'compatible'
if ($Agent =~ m#^Mozilla.*\(compatible; *([^;)]+)#oi) {
$spoofer = $1;
$spoofer =~ s#/#-#og;
$spoofer =~ s/\W+$//o;
$Agent="$spoofer spoofing as $Agent";
}
}
sub ReadLog {
local($i) = 1;
while ($line=) {
$refscounter++;
chomp $line;
$line =~ s#\s+# #go; # Fixes proxy info bug. Fix suggested by
($IP,$TimeDate,$Agent) = $line =~ /^(\S+) \[([^\]\[]+)\] \"([^\"]*)\"/o;
# Check for Proxies
if ( $i ) {
$i = &Date2String($TimeDate);
push(@start,$i);
$i = 0;
}
&DeSpoofAgent;
# Lets not let children play with dangerous toys...
$Agent =~ s#<#\<#go;
$Agent =~ s#\\&#go;
$Agent =~ s#>#\>#go;
$Agent =~ s#"#\"#go;
# strip leading chr(0)
$Agent =~ s/^\x0*//;
$Agent = "Unknown" if (! $Agent);
$rawagents{$Agent}++;
}
close(AGENTLOG);
$i = &Date2String($TimeDate);
push(@stop,$i);
}
sub AnalyzeAgents {
foreach $agent (keys (%rawagents)) {
$longagent=$agent;
($base) = $longagent =~ m#^([^\(\[]+)#o;
$base =~ s#\s+$##o;
($name,$version) = $base =~ m#^([^\d\/]+)[\s\/vV]+(\d[\.\d]+)#o;
if ( ($name eq "") || ($name =~ /^\ +/)) {
($name) = split(/\s+/,$base);
}
$agentgroup{$name} += $rawagents{$agent};
$agentversion{"$name $version"} += $rawagents{$agent};
$baseagent{$base} += $rawagents{$agent};
}
}
sub PrintReport {
local($date);
$li=" \n",
"\n",
"WWW Agent Statistics for ", $HTTPDSERVER, "\n",
"\n",
"\n\n",
"WWW Agent Statistics for ",$HTTPDSERVER,"
\n",
"
\n";
&PrintSummary;
print
"\n",
"$li#broad\">Summary
\n",
"$li#version\">Summary by version\n",
"$li#detail\">Summary by fine detail of version\n",
"$li#complete\">Detailed report\n",
"\n";
&PrintBroadVersion;
&PrintVersion;
&PrintDetailedVersion;
&PrintComplete;
$date = localtime(time);
print
"
\n\n\n";
select(STDOUT);
if ( $OutputFile ) {
rename "$OutputFile.$$", "$OutputFile";
select(STDOUT);
}
}
sub PrintSummary {
local($dd,$mm,$yy,$time,$offset,$period);
@tmp = sort @start;
($yy,$mm,$dd,$time,$offset) = split(/,/,$tmp[0]);
$period = "$dd/$Number2Month{$mm}/$yy $time $offset to ";
@tmp = sort @stop;
($yy,$mm,$dd,$time,$offset) = split(/,/,$tmp[$#tmp]);
$period .= "$dd/$Number2Month{$mm}/$yy $time $offset";
print
"\nPeriod Covered: ", $period, "
\n",
"Web Browser Hits measured: ", $refscounter, "
\n",
"
\n";
}
sub GetPercent {
# $_[0] = relative value, $_[1] = absolut value
$percent = 100 * $_[0] / $_[1];
$percent = sprintf("%5.2f",$percent);
return $percent;
}
sub PrintBroadVersion {
print
$newsection, "broad\">Summary
",
"",
"| Hits | Percent | Browser \n";
foreach $key (sort AgentByHits keys(%agentgroup)) {
print
" |
|---|
| ", $agentgroup{$key} , " ",
" | ", &GetPercent($agentgroup{$key},$refscounter), " ",
"% | ", $key, " \n";
}
print " |
\n", $return2index;
}
sub PrintVersion {
print
$newsection, "version\">Summary by version\n\n",
"\n",
"| Hits | Percent | Browser \n";
foreach $key (sort VersionByHits keys(%agentversion)) {
print
" |
|---|
| ",$agentversion{$key}," ",
" | ", &GetPercent($agentversion{$key},$refscounter)," ",
"% | ", $key, " \n";
}
print " |
\n", $return2index;
}
sub PrintDetailedVersion {
print
$newsection, "detail\">Summary by fine detail of version\n\n",
"",
"| Hits | Percent | Browser \n";
foreach $key (sort BaseByHits keys(%baseagent)) {
print
" |
|---|
| ",$baseagent{$key}, " ",
" | ", &GetPercent($baseagent{$key},$refscounter), " ",
"% | ", $key ," \n";
}
print " |
\n", $return2index;
}
sub PrintComplete {
print
$newsection, "complete\">Detailed report\n\n",
"\n",
"| Hits | Percent | Browser \n";
foreach $key (sort keys(%rawagents)) {
print " |
|---|
| ", $rawagents{$key}, " ",
" | ", &GetPercent($rawagents{$key},$refscounter), " ",
"% | ", $key, " \n";
}
print " |
\n", $return2index;
}
sub AgentByHits {
$tmp = $agentgroup{$b}<=>$agentgroup{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub VersionByHits {
$tmp = $agentversion{$b}<=>$agentversion{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub BaseByHits {
$tmp = $baseagent{$b}<=>$baseagent{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub Date2String {
local($dd,$mm,$yy,$time,$offset,$tmp);
$_ = $_[0];
/\d+\/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\/\d+[:\ ]\d+:\d+:\d+\s+[+|-]\d+/;
$tmp = $&;
($tmp,$time,$offset) = split(/ /,$tmp);
if (! $offset ) {
$offset = $time;
($tmp,$time,$dd,$mm) = split(/:/,$tmp);
$time .= ":$dd:$mm";
}
($dd,$mm,$yy) = split(/\//,$tmp);
$tmp = "$yy,$Month2Number{$mm},$dd,$time,$offset";
return $tmp;
}
sub ReadOld {
local($tmp, $tmp1);
while () {
chop;
last if (m#