diff --git a/README.md b/README.md index b2e66cd..274fafe 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,16 @@ If your URLs use push-state: # Customization +### Popular search engines' spiders user-agent list + +Many search engines do not follow google's _escaped_fragment_ proposal, we can use user-agent to detect these spiders. +```js +var prerender = require('prerender-node'); +prerender.set('robot', 'Baiduspider|Googlebot|BingBot|Slurp!|MSNBot|YoudaoBot|JikeSpider|Sosospider|360Spider|Sogou web spider|Sogou inst spider'); +app.use(prerender); + +``` + ### Whitelist Whitelist a single url path or multiple url paths. Compares using regex, so be specific when possible. If a whitelist is supplied, only url's containing a whitelist path will be prerendered. diff --git a/index.js b/index.js index 2fea697..ce33b90 100644 --- a/index.js +++ b/index.js @@ -91,7 +91,9 @@ prerender.extensionsToIgnore = [ '.iso', '.flv', '.m4v', - '.torrent' + '.torrent', + '.woff', + '.ttf' ]; @@ -107,6 +109,14 @@ prerender.blacklisted = function(blacklist) { }; +prerender.isRobot = function(userAgent){ + if(!this.robot) return false; + + var robotReg = new RegExp(this.robot, 'i'); + if (robotReg.test(userAgent)) return true; + return false; +}; + prerender.shouldShowPrerenderedPage = function(req) { var userAgent = req.headers['user-agent'] , bufferAgent = req.headers['x-bufferbot'] @@ -117,7 +127,7 @@ prerender.shouldShowPrerenderedPage = function(req) { //if it contains _escaped_fragment_, show prerendered page var parsedQuery = url.parse(req.url, true).query; - if(parsedQuery && parsedQuery.hasOwnProperty('_escaped_fragment_')) isRequestingPrerenderedPage = true; + if(prerender.isRobot(userAgent) || (parsedQuery && parsedQuery.hasOwnProperty('_escaped_fragment_'))) isRequestingPrerenderedPage = true; //if it is a bot...show prerendered page if(prerender.crawlerUserAgents.some(function(crawlerUserAgent){ return userAgent.toLowerCase().indexOf(crawlerUserAgent.toLowerCase()) !== -1;})) isRequestingPrerenderedPage = true;