#!/usr/local/bin/perl
# $Id: browsercounter.pl,v 1.7 1997/10/12 08:33:56 elkner Exp $
# browsercounter: Program to create statistics from log files about WWW Agents
# accessing WWW server.
# written by Jens Elkner (elkner@irb.cs.uni-magdeburg.de)
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE.
#
# I offer it to the public domain and I ask, however, that this paragraph
# and my name be retained in any modified versions of the file you may
# make, and that you notify me of any improvements you make to the code.
#
# Use of this software in any way or in any form, source or binary,
# is not allowed in any country which prohibits disclaimers of any
# implied warranties of merchantability or fitness for a particular
# purpose or any disclaimers of a similar nature.
#
# IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
# SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
# USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT
# LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE
require "ctime.pl";
require "getopts.pl";
$Version="BrowserCounter 1.3-E";
$Zcat="/usr/local/bin/gzip -cd";
# Type of log file being parsed ('agent','combined')
$LogType='agent';
# Name of server
$HTTPDSERVER='www.cs.uni-magdeburg.de';
# cellpadding size
$cellpadding=2;
# Border size
$border=2;
$newsection="
\n
Top\n";
$filecounter=0;
&Initialize;
foreach (@Includes) {
$tmp = $_;
&ReadOld($tmp) if &OpenFile($_);
}
foreach (@ARGV) {
&ReadLog if &OpenFile($_);
}
if ($filecounter > 0) {
&PrintReport;
}
else {
warn "No files found for analyzing!\n";
}
# --------------------------------- That's it --------------------------------
sub OpenFile {
if ( -r $_[0] ) {
$filetime=(stat($_[0]))[9];
}
else {
warn "$_[0] is not readable: $!\n";
}
# assuming these files are real logfiles (i.e. a.html.9601.gz would not
# work for getting the right modifikation time
if ($_[0]=~m/(\.gz|\.Z)/o) {
$yymm = $_[0];
$yymm =~ s/.*(\d\d\d\d).*/$1/;
$mm = substr($yymm,2,2);
$yy = substr($yymm,0,2);
$_[0]="$Zcat $_[0] |";
}
# this has to be an uncompressed log file or stats file
# if it is a stats file, @filetimes and @mmtimes are replaced
else {
($mm,$yy) = (localtime($filetime))[4,5];
$mm++;
$mm = "0$mm" if ( $mm < 10 );
$yy -= 100 if ($yy > 99);
$yy = "0$yy" if ( $yy < 10 );
}
if (open(AGENTLOG,"$_[0]")) {
push @filetimes, $filetime;
$filecounter++;
push @mmtimes,"${yy}${mm}";
1;
}
else {
warn "Can't open $_[0]: $!\n";
0;
}
}
sub ReadLog {
local($agent,$tmp);
while ($line=) {
$refscounter++;
chomp $line;
$line =~ s#\s+# #go;
if ($LogType eq 'combined') {
($tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$agent) = $line =~ /^(\S+) (\S+) (\S+) \[([^\]\[]+)\] \"([^\"]*)\" (\S+) (\S+) \"([^\"]*)\" \"([^\"]*)\"/o;
}
elsif ($LogType eq 'agent') {
$agent=$line;
}
# strip leading chr(0)
$agent =~ s/^\x0*//;
&AnalyzeAgent($agent);
}
close(AGENTLOG);
}
sub GetToken {
# tspecials [\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t]
local($token) = $_[0];
local($remainder);
$remainder = $token;
$token =~ m#^([^\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t]+)#;
if ($token =~ m#^([^\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t]+)(.*)$# ) {
$token = $1;
$remainder = $2;
}
else {
$remainder = $token;
$token = "";
}
return ("$token","$remainder");
}
sub GetComment {
local($comment) = $_[0];
$comment =~ s/[^\(]+\(([^\)]+)\).*/$1/;
return $comment;
}
sub GetRawVersion {
local($version) = $_[0];
if ( $version =~ m#^([0-9]+\.[0-9]+)# ) {
$version = $1;
}
return $version;
}
sub AnalyzeAgent {
local($a) = $_[0];
local($agent,$version,$detailed_version,$spoofer,$spoofer_version,$spoofer_detailed_version,$comment) = "";
# RFC 2068 - 14.32: 1*( product | comment )
($agent,$detailed_version) = &GetToken($a);
$agent = "Unknown" unless $agent;
$detailed_version =~ s/^\///;
($detailed_version,$comment) = &GetToken($detailed_version);
$detailed_version = "???" if ($detailed_version eq "");
$version = &GetRawVersion($detailed_version);
# we do not want to have any sub products, thus we scan next for comment:
# people using pseudo-'standard' of 'compatible' in comment
# like (compatible; Opera/2.12; Windows 95)
$comment = &GetComment($comment);
if ($comment =~ m#compatible;\s*([^;)]+);#oi ) {
$spoofer = $1;
$spoofer =~ s/MSIE\ /MSIE\//; # need to fix MSIE notation to product
# (compatible; MSIE 4.0b1; Windows 95)
($spoofer,$spoofer_detailed_version) = &GetToken($spoofer);
$spoofer = "Unknown" unless $spoofer;
$spoofer_detailed_version =~ s/^\///;
($spoofer_detailed_version,$comment) = &GetToken($spoofer_detailed_version);
$spoofer_detailed_version = "???" if ($spoofer_detailed_version eq "");
$spoofer_version = &GetRawVersion($spoofer_detailed_version);
}
if ( $spoofer ) {
$agent_counter{$spoofer}++;
$agent_version_counter{"$spoofer $spoofer_version"}++;
$agent_detailed_version_counter{"$spoofer $spoofer_detailed_version"}++;
$spoofer_value{"$spoofer $spoofer_detailed_version"} = "$agent $version";
}
else {
$agent_counter{$agent}++;
$agent_version_counter{"$agent $version"}++;
$agent_detailed_version_counter{"$agent $detailed_version"}++;
}
}
sub PrintReport {
select(OUT) if $OutputFile;
$li=" 0 ) {
$yymm = $tmp[$#tmp];
$mm = substr($yymm,2,2);
$yy = substr($yymm,0,2);
$line .= " - $NumberToMonth{$mm} $yy";
}
print
"\n",
"\n",
"W3 Agent Statistics for ", $HTTPDSERVER, "\n",
"\n",
"\n",
"\n",
"\n",
"| \n",
"",
"WWW Agent Statistics for ", $HTTPDSERVER, " |
\n\n",
"
", $line, "\n
\n";
&PrintSummary;
print
"\n",
"$li#agent\">Summary\n",
"$li#version\">Summary by version\n",
"$li#detail\">Summary by detail of version\n",
"
\n";
&PrintAgent;
&PrintAgentVersion;
&PrintDetailedAgentVersion;
print
"\n",
"
\n",
"\n";
if ( $OutputFile ) {
rename "$path/$OutputFile.$$", "$path/$OutputFile";
select(STDOUT);
close(OUT);
}
}
sub PrintSummary {
@tmp = sort @filetimes;
$lastmodtime=$tmp[$#tmp];
$filedate = &ctime($lastmodtime);
$date = &ctime(time);
print
"Last analyzed: ", $date, "
\n",
"Last log file modification: ", $filedate, "
\n",
"\n",
"
Web Browser Hits measured: ", $refscounter, "
\n",
"\n";
}
sub GetPercent {
# $_[0] = relative value, $_[1] = absolut value
$percent = 100 * $_[0] / $_[1];
$percent = sprintf("%5.2f",$percent);
return $percent;
}
sub PrintAgent {
print
$newsection, "agent\">Summary
\n\n",
"\n\n",
"| Hits | Percent | Browser \n";
foreach $key (sort AgentByHits keys(%agent_counter)) {
print
" |
|---|
| ", $agent_counter{$key} , " ",
" | ", &GetPercent($agent_counter{$key},$refscounter),
"% | ", $key, " \n";
}
print " |
\n", $return2index, "\n";
}
sub PrintAgentVersion {
print
$newsection, "version\">Summary by version\n\n",
"\n",
"\n",
"| Hits | Percent | Browser \n";
foreach $key (sort VersionByHits keys(%agent_version_counter)) {
print
" |
|---|
| ", $agent_version_counter{$key},
" | ", &GetPercent($agent_version_counter{$key},$refscounter),
"% | ", $key ," \n";
}
print " |
\n", $return2index, "\n";
}
sub PrintDetailedAgentVersion {
local($tmp);
print
$newsection, "detail\">Summary by detail of version\n\n",
"\n",
"\n",
"| Hits | Percent | Browser | spoofing as \n";
foreach $key (sort DetailedVersionByHits keys(%agent_detailed_version_counter)) {
print
" |
|---|
| ", $agent_detailed_version_counter{$key}, " ",
" | ", &GetPercent($agent_detailed_version_counter{$key},$refscounter),
"% | ", $key, " ";
$tmp = $spoofer_value{$key} ? $spoofer_value{$key} : " ";
print " | ", $tmp, " \n";
}
print " |
\n", $return2index, "\n";
}
sub AgentByHits {
$tmp = $agent_counter{$b}<=>$agent_counter{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub VersionByHits {
$tmp = $agent_version_counter{$b}<=>$agent_version_counter{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub DetailedVersionByHits {
$tmp = $agent_detailed_version_counter{$b}<=>$agent_detailed_version_counter{$a};
($tmp == 0) ? $a cmp $b : $tmp;
}
sub GetLastModTime {
while () {
next if ( $_ !~ m#<\!--\s*(\d+)\s*-->#);
$filetime = $1;
pop @filetimes;
push @filetimes, $filetime;
($mm,$yy) = (localtime($filetime))[4,5];
$mm++;
$mm = "0$mm" if ( $mm < 10 );
$yy -= 100 if ($yy > 99);
$yy = "0$yy" if ( $yy < 10 );
pop @mmtimes;
push @mmtimes, "${yy}${mm}";
last;
}
}
sub ReadOld {
local($agent,$version, $detailed) = "";
while () {
chop;
last if (m#