For this post I’m using PhantomJS version 1.9.
Quite frustratingly I occasionally have a call to page.open() where my callback receives a status of “fail”. This isn’t very helpful as it doesn’t describe what went wrong. Was it a SSL handshake problem (using the --ignore-ssl-errors=true command line argument may solve such problems)? Something else?
Unfortunately the PhantomJS API, at present, doesn’t appear to have an ability to determine the reason for the failure of the page to load. But there are a number of callbacks we can hook into to generate a lot of debugging messages to allow us to determine the reason for the failure.
Simplified Reason Tracking
Just before calling page.open() add the following code (after creating the page variable):
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
Now you can print out the reason for a problem in your page.open() callback, e.g.:
var page = require('webpage').create();
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
page.open(
"http://www.nosuchdomain/",
function (status) {
if ( status !== 'success' ) {
console.log(
"Error opening url \"" + page.reason_url
+ "\": " + page.reason
);
phantom.exit( 1 );
} else {
console.log( "Successful page open!" );
phantom.exit( 0 );
}
}
);
This script outputs the following:
Error opening url "http://www.nosuchdomain/": Host www.nosuchdomain not found
Detailed Logging
Just before calling page.open() add the following code (after creating the page variable):
page.onResourceRequested = function (request) {
system.stderr.writeLine('= onResourceRequested()');
system.stderr.writeLine(' request: ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function(response) {
system.stderr.writeLine('= onResourceReceived()' );
system.stderr.writeLine(' id: ' + response.id + ', stage: "' + response.stage + '", response: ' + JSON.stringify(response));
};
page.onLoadStarted = function() {
system.stderr.writeLine('= onLoadStarted()');
var currentUrl = page.evaluate(function() {
return window.location.href;
});
system.stderr.writeLine(' leaving url: ' + currentUrl);
};
page.onLoadFinished = function(status) {
system.stderr.writeLine('= onLoadFinished()');
system.stderr.writeLine(' status: ' + status);
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
system.stderr.writeLine('= onNavigationRequested');
system.stderr.writeLine(' destination_url: ' + url);
system.stderr.writeLine(' type (cause): ' + type);
system.stderr.writeLine(' will navigate: ' + willNavigate);
system.stderr.writeLine(' from page\'s main frame: ' + main);
};
page.onResourceError = function(resourceError) {
system.stderr.writeLine('= onResourceError()');
system.stderr.writeLine(' - unable to load url: "' + resourceError.url + '"');
system.stderr.writeLine(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString );
};
page.onError = function(msg, trace) {
system.stderr.writeLine('= onError()');
var msgStack = [' ERROR: ' + msg];
if (trace) {
msgStack.push(' TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
system.stderr.writeLine(msgStack.join('\n'));
};
It is important that before this block gets called after the page and system variables are defined, e.g.:
var system = require('system');
var page = require('webpage').create();
Recent Comments