#!/usr/local/bin/perl # $Id: browsercounter.pl,v 1.7 1997/10/12 08:33:56 elkner Exp $ # browsercounter: Program to create statistics from log files about WWW Agents # accessing WWW server. # written by Jens Elkner (elkner@irb.cs.uni-magdeburg.de) # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS # OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE. # # I offer it to the public domain and I ask, however, that this paragraph # and my name be retained in any modified versions of the file you may # make, and that you notify me of any improvements you make to the code. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL I BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, # SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE # USE OF THIS SOFTWARE AND ITS DOCUMENTATION (INCLUDING, BUT NOT # LIMITED TO, LOST PROFITS) EVEN IF I HAVE BEEN ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE require "ctime.pl"; require "getopts.pl"; $Version="BrowserCounter 1.3-E"; $Zcat="/usr/local/bin/gzip -cd"; # Type of log file being parsed ('agent','combined') $LogType='agent'; # Name of server $HTTPDSERVER='www.cs.uni-magdeburg.de'; # cellpadding size $cellpadding=2; # Border size $border=2; $newsection="


\n

Top\n"; $filecounter=0; &Initialize; foreach (@Includes) { $tmp = $_; &ReadOld($tmp) if &OpenFile($_); } foreach (@ARGV) { &ReadLog if &OpenFile($_); } if ($filecounter > 0) { &PrintReport; } else { warn "No files found for analyzing!\n"; } # --------------------------------- That's it -------------------------------- sub OpenFile { if ( -r $_[0] ) { $filetime=(stat($_[0]))[9]; } else { warn "$_[0] is not readable: $!\n"; } # assuming these files are real logfiles (i.e. a.html.9601.gz would not # work for getting the right modifikation time if ($_[0]=~m/(\.gz|\.Z)/o) { $yymm = $_[0]; $yymm =~ s/.*(\d\d\d\d).*/$1/; $mm = substr($yymm,2,2); $yy = substr($yymm,0,2); $_[0]="$Zcat $_[0] |"; } # this has to be an uncompressed log file or stats file # if it is a stats file, @filetimes and @mmtimes are replaced else { ($mm,$yy) = (localtime($filetime))[4,5]; $mm++; $mm = "0$mm" if ( $mm < 10 ); $yy -= 100 if ($yy > 99); $yy = "0$yy" if ( $yy < 10 ); } if (open(AGENTLOG,"$_[0]")) { push @filetimes, $filetime; $filecounter++; push @mmtimes,"${yy}${mm}"; 1; } else { warn "Can't open $_[0]: $!\n"; 0; } } sub ReadLog { local($agent,$tmp); while ($line=) { $refscounter++; chomp $line; $line =~ s#\s+# #go; if ($LogType eq 'combined') { ($tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$tmp,$agent) = $line =~ /^(\S+) (\S+) (\S+) \[([^\]\[]+)\] \"([^\"]*)\" (\S+) (\S+) \"([^\"]*)\" \"([^\"]*)\"/o; } elsif ($LogType eq 'agent') { $agent=$line; } # strip leading chr(0) $agent =~ s/^\x0*//; &AnalyzeAgent($agent); } close(AGENTLOG); } sub GetToken { # tspecials [\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t] local($token) = $_[0]; local($remainder); $remainder = $token; $token =~ m#^([^\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t]+)#; if ($token =~ m#^([^\(\)\<\>\@,;:\\\"\/\[\]\?=\{\}\ \t]+)(.*)$# ) { $token = $1; $remainder = $2; } else { $remainder = $token; $token = ""; } return ("$token","$remainder"); } sub GetComment { local($comment) = $_[0]; $comment =~ s/[^\(]+\(([^\)]+)\).*/$1/; return $comment; } sub GetRawVersion { local($version) = $_[0]; if ( $version =~ m#^([0-9]+\.[0-9]+)# ) { $version = $1; } return $version; } sub AnalyzeAgent { local($a) = $_[0]; local($agent,$version,$detailed_version,$spoofer,$spoofer_version,$spoofer_detailed_version,$comment) = ""; # RFC 2068 - 14.32: 1*( product | comment ) ($agent,$detailed_version) = &GetToken($a); $agent = "Unknown" unless $agent; $detailed_version =~ s/^\///; ($detailed_version,$comment) = &GetToken($detailed_version); $detailed_version = "???" if ($detailed_version eq ""); $version = &GetRawVersion($detailed_version); # we do not want to have any sub products, thus we scan next for comment: # people using pseudo-'standard' of 'compatible' in comment # like (compatible; Opera/2.12; Windows 95) $comment = &GetComment($comment); if ($comment =~ m#compatible;\s*([^;)]+);#oi ) { $spoofer = $1; $spoofer =~ s/MSIE\ /MSIE\//; # need to fix MSIE notation to product # (compatible; MSIE 4.0b1; Windows 95) ($spoofer,$spoofer_detailed_version) = &GetToken($spoofer); $spoofer = "Unknown" unless $spoofer; $spoofer_detailed_version =~ s/^\///; ($spoofer_detailed_version,$comment) = &GetToken($spoofer_detailed_version); $spoofer_detailed_version = "???" if ($spoofer_detailed_version eq ""); $spoofer_version = &GetRawVersion($spoofer_detailed_version); } if ( $spoofer ) { $agent_counter{$spoofer}++; $agent_version_counter{"$spoofer $spoofer_version"}++; $agent_detailed_version_counter{"$spoofer $spoofer_detailed_version"}++; $spoofer_value{"$spoofer $spoofer_detailed_version"} = "$agent $version"; } else { $agent_counter{$agent}++; $agent_version_counter{"$agent $version"}++; $agent_detailed_version_counter{"$agent $detailed_version"}++; } } sub PrintReport { select(OUT) if $OutputFile; $li="
  • 0 ) { $yymm = $tmp[$#tmp]; $mm = substr($yymm,2,2); $yy = substr($yymm,0,2); $line .= " - $NumberToMonth{$mm} $yy"; } print "\n", "\n", "W3 Agent Statistics for ", $HTTPDSERVER, "\n", "\n", "\n", "\n", "\n", "
    \n", "", "WWW Agent Statistics for ", $HTTPDSERVER, "
    \n

    \n", "

    ", $line, "
    \n


    \n"; &PrintSummary; print "
    \n"; &PrintAgent; &PrintAgentVersion; &PrintDetailedAgentVersion; print "

    \n", "\n
    \n", "Generated with  ", "", $Version,"  written by  ", "", "Jens Elkner
    \n", "\n"; if ( $OutputFile ) { rename "$path/$OutputFile.$$", "$path/$OutputFile"; select(STDOUT); close(OUT); } } sub PrintSummary { @tmp = sort @filetimes; $lastmodtime=$tmp[$#tmp]; $filedate = &ctime($lastmodtime); $date = &ctime(time); print "Last analyzed: ", $date, "
    \n", "Last log file modification: ", $filedate, "
    \n", "\n", "
    Web Browser Hits measured: ", $refscounter, "
    \n", "

    \n"; } sub GetPercent { # $_[0] = relative value, $_[1] = absolut value $percent = 100 * $_[0] / $_[1]; $percent = sprintf("%5.2f",$percent); return $percent; } sub PrintAgent { print $newsection, "agent\">Summary

  • \n\n", "
    \n\n", "
    Hits Percent Browser
    \n"; foreach $key (sort AgentByHits keys(%agent_counter)) { print "
    ", $agent_counter{$key} , " ", "", &GetPercent($agent_counter{$key},$refscounter), "% ", $key, "
    \n"; } print "
    \n", $return2index, "
    \n"; } sub PrintAgentVersion { print $newsection, "version\">Summary by version\n\n", "
    \n", "\n", "
    Hits Percent Browser
    \n"; foreach $key (sort VersionByHits keys(%agent_version_counter)) { print "
    ", $agent_version_counter{$key}, "", &GetPercent($agent_version_counter{$key},$refscounter), "% ", $key ,"
    \n"; } print "
    \n", $return2index, "
    \n"; } sub PrintDetailedAgentVersion { local($tmp); print $newsection, "detail\">Summary by detail of version\n\n", "
    \n", "\n", "
    Hits Percent Browser spoofing as
    \n"; foreach $key (sort DetailedVersionByHits keys(%agent_detailed_version_counter)) { print "
    ", $agent_detailed_version_counter{$key}, " ", "", &GetPercent($agent_detailed_version_counter{$key},$refscounter), "% ", $key, " "; $tmp = $spoofer_value{$key} ? $spoofer_value{$key} : " "; print "", $tmp, "
    \n"; } print "
    \n", $return2index, "
    \n"; } sub AgentByHits { $tmp = $agent_counter{$b}<=>$agent_counter{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub VersionByHits { $tmp = $agent_version_counter{$b}<=>$agent_version_counter{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub DetailedVersionByHits { $tmp = $agent_detailed_version_counter{$b}<=>$agent_detailed_version_counter{$a}; ($tmp == 0) ? $a cmp $b : $tmp; } sub GetLastModTime { while () { next if ( $_ !~ m#<\!--\s*(\d+)\s*-->#); $filetime = $1; pop @filetimes; push @filetimes, $filetime; ($mm,$yy) = (localtime($filetime))[4,5]; $mm++; $mm = "0$mm" if ( $mm < 10 ); $yy -= 100 if ($yy > 99); $yy = "0$yy" if ( $yy < 10 ); pop @mmtimes; push @mmtimes, "${yy}${mm}"; last; } } sub ReadOld { local($agent,$version, $detailed) = ""; while () { chop; last if (m##oi); # check for end of html file &GetLastModTime if (//oi); if (//oi) { &ReadOldAgent; $agent = "1"; } if (//oi) { &ReadOldVersion; $version = "1"; } if (//oi) { &ReadOldDetailedVersion; $detailed = "1"; last; } } close(AGENTLOG); } sub ReadOldAgent { while () { last if /<\/table>/oi; next unless //; /<[^>]+>\s*(\d+)\s*<[^>]+>[^>]+>\s*([^<]+)/oi; if ( $1 && $2 ) { $refscounter += $1; $agent_counter{$2} += $1; } } } sub ReadOldVersion { while () { last if /<\/table>/oi; next unless //; /<[^>]+>\s*(\d+)\s*<[^>]+>[^>]+>\s*([^<]+)/oi; if ( $1 && $2 ) { $agent_version_counter{$2} += $1; } } } sub ReadOldDetailedVersion { local($number,$agent,$spoofer); while () { last if /<\/table>/oi; next unless //; /<[^>]+>\s*(\d+)\s*<[^>]+>[^>]+>\s*([^<]+)<[^>]+>([^<]+)/oi; $number = $1; $agent = $2; $spoofer = $3; $agent =~ s/\s*(.*\S+)\s*$/$1/; $spoofer =~ s/\ //g; if ( $agent && $number ) { $agent_detailed_version_counter{$agent} += $number; } $spoofer_value{"$agent"} = "$spoofer" if $spoofer; } } sub Version { die <<"EndVersion"; This is $Version.\n\n(C) by Jens Elkner (elkner\@irb.cs.uni-magdeburg.de). EndVersion } sub Usage { die <<"EndUsage"; Process a sequence of NCSA httpd common format agent_log files and generate an HTML summary. Usage: browsercounter [-h] [-v] [-t] [-N HTTD-Server-Name] [-i fList] [-o file] [-p path] [logfile ...] [logfile.gz ...] [logfile.Z ...] Options: -h Display this message and quit. -v Display version -t Set type of the Logfile to combined (default = agent) -N name HTTPD Server name for report -i fList list of stat files (afile) for inclusion -o afile Output file (default = Standard Output) -p path System directory, where to store generated files Terms: fList ... comma separated List of files (whitespaces are NOT allowed). logfile ... common or combined format log file with user agent data EndUsage } sub Initialize { %NumberToMonth=( '01','January', '02','February', '03','March', '04','April', '05','May', '06','June', '07','July', '08','August', '09','September', '10','October', '11','November', '12','December', ); $result = &Getopts('hi:N:o:p:tv'); &Usage if $opt_h || $result == 0; &Version if $opt_v; if ($opt_i) { @Includes = split(",",$opt_i); } $path = $opt_p ? $opt_p : "."; $HTTPDSERVER=$opt_N if $opt_N; $LogType="combined" if ($opt_t); if ($opt_o && ( $opt_o ne '-')) { $OutputFile = $opt_o; open(OUT,">$path/$OutputFile.$$") || die "Can not open $path/$OutputFile: $!\n"; } }