Created
April 25, 2012 17:35
-
-
Save nitoyon/2491537 to your computer and use it in GitHub Desktop.
Parse HTTP access log and find web font download ratio.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use HTTP::BrowserDetect; | |
use Data::Dumper; | |
use DateTime::Format::HTTP; | |
use DateTime::Duration; | |
my $browser = new HTTP::BrowserDetect(""); | |
my $total = 0; | |
my $summary = {}; | |
my $hosts = {}; | |
my $browser_cache = {}; | |
while (<STDIN>) { | |
my $result = parseLine($_); | |
next unless $result; | |
my $host = lc $result->{host}; | |
my $ua_raw = $result->{ua}; | |
my $ua = $ua_raw; | |
$ua = $1 if $ua =~ /^(.*) Twitter for iPhone$/; | |
if (exists $browser_cache->{$ua}) { | |
$ua = $browser_cache->{$ua}; | |
} else { | |
$browser->user_agent($ua); | |
next unless $browser->browser_string(); | |
next if $browser->robot(); | |
my $os = $browser->os_string() // ""; | |
$os = "iOS $1" if $os eq "iOS" and $ua =~/OS ([0-9_]+)/; | |
$os = "Windows" if $browser->windows(); | |
$os = $1 if $browser->android() and $ua=~/Android ([0-9\.]+)/; | |
$os =~ s/_/./g; | |
my $browser_version = $browser->public_version(); | |
$browser_version = $1 if ($browser->safari() or $browser->mobile_safari()) and $ua=~m!AppleWebKit/([\d\.]+)!; | |
$ua = sprintf("%s,%s,%s,%s", | |
$browser->device_name() // "PC", | |
$os, | |
$browser->browser_string(), | |
$browser_version); | |
$browser_cache->{$ua_raw} = $ua; | |
} | |
my $key = $ua; | |
$summary->{$key} = {none => 0, none_queue => [] } unless exists $summary->{$key}; | |
my $url = $result->{url}; | |
$url =~s/\?|'$//; | |
if ($url eq "none") { | |
$summary->{$key}->{none}++; | |
push @{$summary->{$key}->{none_queue}}, $result->{time}; | |
} elsif ($summary->{$key}->{none} > 0){ | |
while (@{$summary->{$key}->{none_queue}}) { | |
my $d = shift @{$summary->{$key}->{none_queue}}; | |
if (($result->{time} - $d)->seconds <= 30) { | |
$summary->{$key}->{$url}++; | |
$summary->{$key}->{none}--; | |
last; | |
} | |
} | |
} | |
$total++; | |
#last if $total >= 100; | |
} | |
my $count = {}; | |
my $total_count = 0; | |
for my $ua(keys %$summary) { | |
for my $font(keys %{$summary->{$ua}}) { | |
next if $font eq "none_queue"; | |
$count->{$font} = {count => 0, ua => {} } unless exists $count->{$font}; | |
next unless $summary->{$ua}->{$font} > 0; | |
$count->{$font}->{count} += $summary->{$ua}->{$font}; | |
$count->{$font}->{ua}->{$ua} += $summary->{$ua}->{$font}; | |
$total_count += $summary->{$ua}->{$font}; | |
} | |
} | |
my $i = 1; | |
for my $font(keys %$count) { | |
for (keys %{$count->{$font}->{ua}}) { | |
print sprintf("%s,$font,%d\n", $_, $count->{$font}->{ua}->{$_}); | |
} | |
} | |
# per font summary | |
for my $font(keys %$count) { | |
print sprintf("%s %d (%f%%)\n", $font, $count->{$font}->{count}, $count->{$font}->{count} / $total_count * 100); | |
} | |
sub parseLine { | |
my $line = shift; | |
my ($host, $ident, $user, $time, $request, $status, $bytes, $referer, $agent) = ($line =~ /^([^ ]*) ([^ ]*) ([^ ]*) \[([^]]*)\] "(.*?)" ([^ ]*) ([^ ]*) "(.*?)" "(.*?)"/); | |
return unless $agent; | |
my $url = 'none'; | |
$url = uc($1) if $request=~m!^GET /misc/js/uncopyable/ciphered-mplus.([^ ]+)!; | |
return { | |
time => DateTime::Format::HTTP->parse_datetime($time), | |
host => $host, | |
url => $url, | |
ua => $agent, | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment