Last active
September 12, 2017 17:45
-
-
Save duncanmorris/5104509eddbdc5a2de11 to your computer and use it in GitHub Desktop.
Using PhantomJS to monitor Google Analytics - full code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// initialise various variables | |
var page = require('webpage').create(), | |
system = require('system'), | |
address; | |
// how long should we wait for the page to load before we exit | |
// in ms | |
var WAIT_TIME = 5000; | |
// if the page hasn't loaded after this long, something is probably wrong. | |
// in ms | |
var MAX_EXECUTION_TIME = 15000; | |
// output error messages | |
var DEBUG = false | |
// a list of regular expressions of resources (urls) to log when we load them | |
var resources_to_log = [ | |
new RegExp('^http(s)?://(www|ssl)\.google-analytics\.com.*'), | |
new RegExp('^http(s)?://stats\.g\.doubleclick\.net.*') | |
]; | |
// check we have a url, if not exit | |
if (system.args.length === 1) { | |
console.log('Usage: get_ga_resources.js http://www.yoururl.com'); | |
phantom.exit(1); | |
} else { | |
// address is the url passed | |
address = system.args[1]; | |
// create a function that is called every time a resource is requested | |
// http://phantomjs.org/api/webpage/handler/on-resource-requested.html | |
page.onResourceRequested = function (res) { | |
// loop round all our regexs to see if this url matches any of them | |
var length = resources_to_log.length; | |
while(length--) { | |
if (resources_to_log[length].test(res.url)){ | |
// we have a match, log it | |
console.log(res.url); | |
} | |
} | |
}; | |
// if debug is true, log errors, else ignore them | |
page.onError = function(msg, trace){ | |
if (DEBUG) { | |
console.log('ERROR: ' + msg) | |
console.log(trace) | |
} | |
}; | |
// make a note of any errors so we can print them out | |
page.onResourceError = function(resourceError) { | |
page.reason = resourceError.errorString; | |
page.reason_url = resourceError.url; | |
}; | |
// now all we have to do is open the page, wait WAIT_TIME ms and exit | |
try { | |
page.open(address, function (status) { | |
if (status !== 'success') { | |
console.log("FAILED: to load " + system.args[1]); | |
console.log(page.reason_url); | |
console.log(page.reason); | |
phantom.exit(); | |
} else { | |
if (address != page.url){ | |
console.log('Redirected: ' + page.url) | |
} | |
setTimeout(function () { | |
phantom.exit(); | |
}, WAIT_TIME); | |
} | |
}); | |
} finally { | |
// if we are still running after MAX_EXECUTION_TIME ms exit | |
setTimeout(function() { | |
console.log("FAILED: Max execution time " + Math.round(MAX_EXECUTION_TIME) + " seconds exceeded"); | |
phantom.exit(1); | |
}, MAX_EXECUTION_TIME); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
phantomjs get_ga_resources.js http://www.yoururl.com
NB - If you are requesting a https site, you made to run the code with an additional parameter
phantomjs --ssl-protocol=any get_ga_resources.js http://www.yoururl.com
There are additional parameters you could pass - http://phantomjs.org/api/command-line.html One of which gives the ability to ignore ssl errors