Take Screenshot of all HTML documents in a folder using PhantomJS

26. September 2011 01:14 by Cameron in javascript, PhantomJS, Programming  //  Tags: , , , ,   //   Comments

Recently I came across a question on stackoverflow that asked about how to take screenshots of all HTML files in a local folder. I have been playing with PhantomJS quite a bit lately and felt comfortable answering the question. Here is the code for those interested:

var page = require('webpage').create(), loadInProgress = false, fs = require('fs');
var htmlFiles = new Array();
console.log('working directory: ' + fs.workingDirectory);
var curdir = fs.list(fs.workingDirectory);

// loop through files and folders
for(var i = 0; i< curdir.length; i++)
{
	var fullpath = fs.workingDirectory + fs.separator + curdir[i];
	// check if item is a file
	if(fs.isFile(fullpath))
	{
		if(fullpath.indexOf('.html') != -1)
		{
			// show full path of file
			console.log('File path: ' + fullpath);
			htmlFiles.push(fullpath);
		}
	}
}

console.log('Number of Html Files: ' + htmlFiles.length);

// output pages as PNG
var pageindex = 0;

var interval = setInterval(function() {
	if (!loadInProgress && pageindex < htmlFiles.length) {
		console.log("image " + (pageindex + 1));
		page.open(htmlFiles[pageindex]);
	}
	if (pageindex == htmlFiles.length) {
		console.log("image render complete!");
		phantom.exit();
	}
}, 250);

page.onLoadStarted = function() {
	loadInProgress = true;
	console.log('page ' + (pageindex + 1) + ' load started');
};

page.onLoadFinished = function() {
	loadInProgress = false;
	page.render("images/output" + (pageindex + 1) + ".png");
	console.log('page ' + (pageindex + 1) + ' load finished');
	pageindex++;
}

The process is quite simple. First, I loop through all objects in the current working directory and check to see if each item is a file and whether it has the .html extension. Then I add each html file's filepath to an array that I later loop through to take the screenshots. A screenshot must be taken after the page is fully loaded so that the screenshot will contain actual content and not a blank image. This is done by saving the image on the page.onLoadFinished callback. The application loop for taking the screenshots inserts small 250ms delays between each request so that pages may fully load into the headless browser before advancing to the next page.

Month List

Tag cloud