├── LICENSE ├── README.md ├── bower.json └── seo.js /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Louis Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | AJAX-SEO ![Bower Version](https://badge.fury.io/bo/ajax-seo.svg) 2 | ===== 3 | 4 | A simple server to provide rendered html to crawlers, for ajax sites. 5 | 6 | * [Google ajax crawling standard](https://developers.google.com/webmasters/ajax-crawling/docs/getting-started). 7 | * [PhantomJS 2.0](http://phantomjs.org/) 8 | 9 | How to use 10 | ------------ 11 | > prepare the web app 12 | 13 | make sure it's "AJAX crawlable.", which means it adopts hashbang url schema. Read the google doc again if you don't understand. 14 | 15 | 16 | > prepare the server side 17 | 18 | 1. Install PhantomJS, on Mac, you can: `$ brew install phantomjs ` 19 | 20 | ``` 21 | $ sudo apt-get install phantomjs 22 | ``` 23 | 2. Start SEO Server 24 | 25 | ``` 26 | $ phantomjs seo.js 27 | ``` 28 | 3. Setup nginx, add codes below into site configuration: 29 | 30 | ``` 31 | if ($args ~ _escaped_fragment_) { 32 | rewrite ^ /snapshot$uri; 33 | } 34 | 35 | location ~ ^/snapshot(.*) { 36 | rewrite ^/snapshot(.*)$ $1 break; 37 | proxy_pass http://localhost:8888; 38 | proxy_set_header Host $scheme://$host; 39 | proxy_connect_timeout 60s; 40 | } 41 | 42 | ``` 43 | 44 | How to verify 45 | ------------- 46 | ``` 47 | $ curl http://yoursite.domain/page#!/id/12 48 | $ ## verify it's fully rendered HTML 49 | ``` 50 | 51 | How to test your local app w/o nginx 52 | ------------- 53 | ``` 54 | $ ## if your app is running at http://localhost:3000 55 | $ curl http://localhost:8888/page#!/id/12 --header Host:localhost:3000 56 | $ ## verify it's fully rendered HTML 57 | ``` 58 | 59 | Notes 60 | ------- 61 | For index page of your site, you need to add this in HTML if you haven't: 62 | ``` 63 | 64 | ``` 65 | 66 | 67 | if you have trouble for https URLs, try this: 68 | ``` 69 | $ # phantomjs --ssl-protocol=any seo.js 70 | ``` 71 | see [phantomjs options documentation](http://phantomjs.org/api/command-line.html) 72 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ajax-seo", 3 | "version": "0.2.0", 4 | "homepage": "https://github.com/liuwenchao/ajax-seo", 5 | "authors": [ 6 | "Louis Liu " 7 | ], 8 | "description": "Use phantomjs to build a simple server to provide rendered html to crawlers, for ajax sites.", 9 | "main": "seo.js", 10 | "moduleType": [ 11 | "node" 12 | ], 13 | "keywords": [ 14 | "ajax", 15 | "seo", 16 | "phantomjs" 17 | ], 18 | "license": "MIT", 19 | "ignore": [ 20 | "**/.*", 21 | "node_modules", 22 | "bower_components", 23 | "test", 24 | "tests" 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /seo.js: -------------------------------------------------------------------------------- 1 | var system = require('system'); 2 | 3 | if (system.args.length > 2 || system.args[1] == 'help') { 4 | console.log("Usage: phantomjs seo.js [port -- optional, default 8888]"); 5 | console.log("Example: phantomjs seo.js"); 6 | console.log("Example: phantomjs seo.js 8848"); 7 | phantom.exit(); 8 | } 9 | 10 | var port = system.args[1] || 8888, 11 | server = require('webserver').create(), 12 | log = function(message) { 13 | var messages = typeof message === 'string' ? [message] : message; 14 | console.info( 15 | [new Date().toISOString().substr(0, 19), '[INFO] '] 16 | .concat(messages) 17 | .join(' ') 18 | ); 19 | }; 20 | 21 | var render = function(url, cb) { 22 | var page = require('webpage').create(); 23 | page.settings.loadImages = false; 24 | page.settings.localToRemoteUrlAccessEnabled = true; 25 | page.onResourceRequested = function(requestData, request) { 26 | // Ignore css and fonts. 27 | if (['text/css', 'application/font-woff'].indexOf(requestData.headers['Content-Type']) >= 0 28 | || (/.+?\.(css|woff)/gi).test(requestData.url)) { 29 | log(['Request (#', requestData.id, ') ', requestData.url, 'abort']); 30 | request.abort(); 31 | } else { 32 | log(['Request (#', requestData.id, ') ', requestData.url]); 33 | } 34 | }; 35 | page.onResourceReceived = function(response) { 36 | if (response.url) { 37 | log(['Response (#', response.id, '):', response.url, response.stage]); 38 | } 39 | }; 40 | page.onConsoleMessage = function(msg, lineNum, sourceId) { 41 | log(['CONSOLE: ', msg, ' (from line #', lineNum, ' in "', sourceId, '")']); 42 | }; 43 | page.onError = function(msg, trace) { 44 | var msgStack = [msg]; 45 | 46 | if (trace && trace.length) { 47 | msgStack.push('TRACE:'); 48 | trace.forEach(function(t) { 49 | msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : '')); 50 | }); 51 | } 52 | 53 | console.error(msgStack.join('\n')); 54 | 55 | cb(page.content); 56 | page.close(); 57 | }; 58 | 59 | // http://phantomjs.org/api/webpage/handler/on-callback.html 60 | // Consider waitFor example. 61 | page.onCallback = function() { 62 | cb(page.content); 63 | page.close(); 64 | }; 65 | page.onInitialized = function() { 66 | page.evaluate(function() { 67 | setTimeout(function() { 68 | window.callPhantom(); 69 | }, 10000); 70 | }); 71 | }; 72 | page.open(url); 73 | }; 74 | 75 | // turn 'page?_escaped_fragment_=/post/24' to 'page#!/post/24' 76 | var toHashBangUrl = function(host, path) { 77 | if (!host) { 78 | log('no Host is set in the request headers! please fix it!'); 79 | } 80 | var search = path.substring(path.indexOf('?')+1); 81 | var route_parts = search.split('&').filter(function(v){ 82 | if (v.split('=')[0] === '_escaped_fragment_') return true; 83 | }); 84 | var route = route_parts[0].split('=')[1]; 85 | return host 86 | + path.slice(0, path.indexOf('?')) 87 | + '#!' 88 | + decodeURIComponent(route); 89 | }; 90 | 91 | var service = server.listen(port, function (request, response) { 92 | // log(JSON.stringify(request)); 93 | render(toHashBangUrl(request.headers['Host'], request.url), function(html) { 94 | response.statusCode = 200; 95 | response.write(html); 96 | response.close(); 97 | }); 98 | }); 99 | 100 | if (service) { 101 | log(['SEO server running on port:', port]); 102 | log('Press Ctrl+C to stop...\n'); 103 | } else { 104 | log(['Error: Could not start server listening on port: ', port]); 105 | phantom.exit(); 106 | } 107 | --------------------------------------------------------------------------------