├── LICENSE
├── README.md
├── bower.json
└── seo.js
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Louis Liu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | AJAX-SEO 
2 | =====
3 |
4 | A simple server to provide rendered html to crawlers, for ajax sites.
5 |
6 | * [Google ajax crawling standard](https://developers.google.com/webmasters/ajax-crawling/docs/getting-started).
7 | * [PhantomJS 2.0](http://phantomjs.org/)
8 |
9 | How to use
10 | ------------
11 | > prepare the web app
12 |
13 | make sure it's "AJAX crawlable.", which means it adopts hashbang url schema. Read the google doc again if you don't understand.
14 |
15 |
16 | > prepare the server side
17 |
18 | 1. Install PhantomJS, on Mac, you can: `$ brew install phantomjs `
19 |
20 | ```
21 | $ sudo apt-get install phantomjs
22 | ```
23 | 2. Start SEO Server
24 |
25 | ```
26 | $ phantomjs seo.js
27 | ```
28 | 3. Setup nginx, add codes below into site configuration:
29 |
30 | ```
31 | if ($args ~ _escaped_fragment_) {
32 | rewrite ^ /snapshot$uri;
33 | }
34 |
35 | location ~ ^/snapshot(.*) {
36 | rewrite ^/snapshot(.*)$ $1 break;
37 | proxy_pass http://localhost:8888;
38 | proxy_set_header Host $scheme://$host;
39 | proxy_connect_timeout 60s;
40 | }
41 |
42 | ```
43 |
44 | How to verify
45 | -------------
46 | ```
47 | $ curl http://yoursite.domain/page#!/id/12
48 | $ ## verify it's fully rendered HTML
49 | ```
50 |
51 | How to test your local app w/o nginx
52 | -------------
53 | ```
54 | $ ## if your app is running at http://localhost:3000
55 | $ curl http://localhost:8888/page#!/id/12 --header Host:localhost:3000
56 | $ ## verify it's fully rendered HTML
57 | ```
58 |
59 | Notes
60 | -------
61 | For index page of your site, you need to add this in HTML if you haven't:
62 | ```
63 |
64 | ```
65 |
66 |
67 | if you have trouble for https URLs, try this:
68 | ```
69 | $ # phantomjs --ssl-protocol=any seo.js
70 | ```
71 | see [phantomjs options documentation](http://phantomjs.org/api/command-line.html)
72 |
--------------------------------------------------------------------------------
/bower.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ajax-seo",
3 | "version": "0.2.0",
4 | "homepage": "https://github.com/liuwenchao/ajax-seo",
5 | "authors": [
6 | "Louis Liu "
7 | ],
8 | "description": "Use phantomjs to build a simple server to provide rendered html to crawlers, for ajax sites.",
9 | "main": "seo.js",
10 | "moduleType": [
11 | "node"
12 | ],
13 | "keywords": [
14 | "ajax",
15 | "seo",
16 | "phantomjs"
17 | ],
18 | "license": "MIT",
19 | "ignore": [
20 | "**/.*",
21 | "node_modules",
22 | "bower_components",
23 | "test",
24 | "tests"
25 | ]
26 | }
27 |
--------------------------------------------------------------------------------
/seo.js:
--------------------------------------------------------------------------------
1 | var system = require('system');
2 |
3 | if (system.args.length > 2 || system.args[1] == 'help') {
4 | console.log("Usage: phantomjs seo.js [port -- optional, default 8888]");
5 | console.log("Example: phantomjs seo.js");
6 | console.log("Example: phantomjs seo.js 8848");
7 | phantom.exit();
8 | }
9 |
10 | var port = system.args[1] || 8888,
11 | server = require('webserver').create(),
12 | log = function(message) {
13 | var messages = typeof message === 'string' ? [message] : message;
14 | console.info(
15 | [new Date().toISOString().substr(0, 19), '[INFO] ']
16 | .concat(messages)
17 | .join(' ')
18 | );
19 | };
20 |
21 | var render = function(url, cb) {
22 | var page = require('webpage').create();
23 | page.settings.loadImages = false;
24 | page.settings.localToRemoteUrlAccessEnabled = true;
25 | page.onResourceRequested = function(requestData, request) {
26 | // Ignore css and fonts.
27 | if (['text/css', 'application/font-woff'].indexOf(requestData.headers['Content-Type']) >= 0
28 | || (/.+?\.(css|woff)/gi).test(requestData.url)) {
29 | log(['Request (#', requestData.id, ') ', requestData.url, 'abort']);
30 | request.abort();
31 | } else {
32 | log(['Request (#', requestData.id, ') ', requestData.url]);
33 | }
34 | };
35 | page.onResourceReceived = function(response) {
36 | if (response.url) {
37 | log(['Response (#', response.id, '):', response.url, response.stage]);
38 | }
39 | };
40 | page.onConsoleMessage = function(msg, lineNum, sourceId) {
41 | log(['CONSOLE: ', msg, ' (from line #', lineNum, ' in "', sourceId, '")']);
42 | };
43 | page.onError = function(msg, trace) {
44 | var msgStack = [msg];
45 |
46 | if (trace && trace.length) {
47 | msgStack.push('TRACE:');
48 | trace.forEach(function(t) {
49 | msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
50 | });
51 | }
52 |
53 | console.error(msgStack.join('\n'));
54 |
55 | cb(page.content);
56 | page.close();
57 | };
58 |
59 | // http://phantomjs.org/api/webpage/handler/on-callback.html
60 | // Consider waitFor example.
61 | page.onCallback = function() {
62 | cb(page.content);
63 | page.close();
64 | };
65 | page.onInitialized = function() {
66 | page.evaluate(function() {
67 | setTimeout(function() {
68 | window.callPhantom();
69 | }, 10000);
70 | });
71 | };
72 | page.open(url);
73 | };
74 |
75 | // turn 'page?_escaped_fragment_=/post/24' to 'page#!/post/24'
76 | var toHashBangUrl = function(host, path) {
77 | if (!host) {
78 | log('no Host is set in the request headers! please fix it!');
79 | }
80 | var search = path.substring(path.indexOf('?')+1);
81 | var route_parts = search.split('&').filter(function(v){
82 | if (v.split('=')[0] === '_escaped_fragment_') return true;
83 | });
84 | var route = route_parts[0].split('=')[1];
85 | return host
86 | + path.slice(0, path.indexOf('?'))
87 | + '#!'
88 | + decodeURIComponent(route);
89 | };
90 |
91 | var service = server.listen(port, function (request, response) {
92 | // log(JSON.stringify(request));
93 | render(toHashBangUrl(request.headers['Host'], request.url), function(html) {
94 | response.statusCode = 200;
95 | response.write(html);
96 | response.close();
97 | });
98 | });
99 |
100 | if (service) {
101 | log(['SEO server running on port:', port]);
102 | log('Press Ctrl+C to stop...\n');
103 | } else {
104 | log(['Error: Could not start server listening on port: ', port]);
105 | phantom.exit();
106 | }
107 |
--------------------------------------------------------------------------------