├── .eslintrc.json
├── .github
├── CODEOWNERS
├── dependabot.yml
└── workflows
│ └── main.yml
├── .gitignore
├── .vscode
├── launch.json
└── settings.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── app.json
├── app.yaml
├── bin
└── rendertron
├── docs
├── api-reference.md
├── configure.md
├── deploy.md
├── index.md
├── server-setup.md
└── server-setup
│ ├── apache.md
│ ├── expressjs.md
│ └── nginx.md
├── middleware
├── CHANGELOG.md
├── LICENSE
├── README.md
├── package-lock.json
├── package.json
├── src
│ ├── middleware.ts
│ └── test
│ │ └── middleware-test.ts
└── tsconfig.json
├── nodemon.json
├── package-lock.json
├── package.json
├── src
├── config.ts
├── datastore-cache.ts
├── filesystem-cache.ts
├── index.html
├── memory-cache.ts
├── renderer.ts
├── rendertron.ts
└── test
│ ├── app-test.ts
│ ├── datastore-cache-test.ts
│ ├── filesystem-cache-test.ts
│ └── memory-cache-test.ts
├── test-resources
├── basic-script.html
├── custom-element.html
├── explicit-render-event.html
├── http-meta-status-code-multiple.html
├── http-meta-status-code.html
├── include-base-as-directory.html
├── include-base.html
├── include-date.html
├── include-doctype.html
├── include-json-ld.html
├── include-script.html
├── inject-element-after-load.js
├── inject-element-module.js
├── inject-element.js
├── package-lock.json
├── package.json
├── request-header.html
├── restrict-test.test.html
├── script-after-load.html
├── shadow-dom-no-polyfill.html
├── shadow-dom-polyfill-all.html
└── shadow-dom-polyfill-loader.html
└── tsconfig.json
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "eslint:recommended",
4 | "plugin:@typescript-eslint/eslint-recommended",
5 | "plugin:@typescript-eslint/recommended"
6 | ],
7 | "parser": "@typescript-eslint/parser",
8 | "parserOptions": {
9 | "ecmaVersion": 2020,
10 | "sourceType": "module"
11 | },
12 | "plugins": ["@typescript-eslint"],
13 | "rules": {
14 | "@typescript-eslint/explicit-module-boundary-types": "off"
15 | },
16 | "overrides": [
17 | {
18 | "files": "test-resources/**/*",
19 | "env": {
20 | "browser": true
21 | }
22 | }
23 | ]
24 | }
25 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @AVGP
2 | /middleware/ @aomarks
3 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: 'npm'
4 | directory: '/'
5 | schedule:
6 | interval: 'daily'
7 | versioning-strategy: increase
8 | - package-ecosystem: 'npm'
9 | directory: '/middleware/'
10 | schedule:
11 | interval: 'daily'
12 | versioning-strategy: increase
13 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: CI
4 |
5 | # Controls when the action will run. Triggers the workflow on push or pull request
6 | # events but only for the main branch
7 | on:
8 | push:
9 | branches: [ main ]
10 | pull_request:
11 | branches: [ main ]
12 |
13 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
14 | jobs:
15 | lint:
16 | name: Lint
17 |
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 |
23 | - uses: actions/setup-node@v1
24 | with:
25 | node-version: 14
26 |
27 | - run: npm ci
28 |
29 | - run: npm run lint
30 |
31 | test:
32 | name: Integration tests
33 |
34 | runs-on: ubuntu-latest
35 |
36 | strategy:
37 | matrix:
38 | node: ['10', '12', '14']
39 |
40 | steps:
41 | - uses: actions/checkout@v2
42 |
43 | - uses: actions/setup-node@v1
44 | with:
45 | node-version: ${{ matrix.node }}
46 |
47 | - run: npm ci
48 |
49 | - run: npm test
50 |
51 | test-cache:
52 | name: e2e cache tests
53 |
54 | runs-on: ubuntu-latest
55 |
56 | strategy:
57 | matrix:
58 | node: ['10', '12', '14']
59 |
60 | steps:
61 | - uses: actions/checkout@v2
62 |
63 | - uses: actions/setup-node@v1
64 | with:
65 | node-version: ${{ matrix.node }}
66 |
67 | - name: Install dependencies
68 | run: |
69 | npm ci
70 | export CLOUDSDK_CORE_DISABLE_PROMPTS=1
71 | sudo apt-get update && sudo apt-get install google-cloud-sdk google-cloud-sdk-datastore-emulator
72 | mkdir ./tmp
73 | echo '{"cacheConfig": { "snapshotDir": "./tmp/rendertron" } }' > ./config.json
74 |
75 | - name: Run the e2e cache tests
76 | run: npm run test-cache
77 |
78 | test-middleware:
79 | name: Middleware tests
80 |
81 | runs-on: ubuntu-latest
82 |
83 | strategy:
84 | matrix:
85 | node: ['10', '12', '14']
86 |
87 | steps:
88 | - uses: actions/checkout@v2
89 |
90 | - uses: actions/setup-node@v1
91 | with:
92 | node-version: ${{ matrix.node }}
93 |
94 | - run: cd middleware/
95 |
96 | - run: npm ci
97 |
98 | - run: npm run test
99 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | node_modules/
3 | test.jpeg
4 | yarn.lock
5 | *.tgz
6 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "type": "node",
9 | "request": "launch",
10 | "name": "Launch Program",
11 | "program": "${workspaceFolder}/build/rendertron.js"
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "editor.formatOnSave": true,
3 | "editor.formatOnType": true,
4 | "search.useIgnoreFiles": false,
5 | "search.exclude": {
6 | "node_modules/": true
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 |
4 |
5 | ## [3.1.0] 2020-11-04
6 |
7 | ### Security improvements
8 |
9 | - Prevents rendering of \*.internal URLs, patching an issue with GCE.
10 | - Adds allow-listing option to restrict rendering to a given list of domains or URL patterns.
11 |
12 | ### Features
13 |
14 | - Include Heroku deploy documentation and an interactive "Click to deploy" for Heroku
15 | - Introduces an API endpoint to clear all cache
16 | - Adds timezone support
17 | - Adds optional forced browser restart between renders
18 | - Adds documentation to deploy Rendertron via Docker
19 | - Adds option to add request headers to the rendered pages in Rendertron
20 |
21 | ### Improvements
22 |
23 | - Fixes a bug in the filesystem cache
24 | - Fixes issue with the injected base tag
25 | - Updates all dependencies
26 | - Increased the required node.js version to Node.js 10+
27 | - Updates to the FaQ
28 |
29 | ## [3.0.0] 2020-07-02
30 |
31 | - Introduces new configuration file format for the `config.json` options (see [README.md](./README.md))
32 | - Introduces new cache providers:
33 | - In-memory cache
34 | - File system cache
35 | - Introduces API endpoint to invalidate cache for a URL
36 | - Introduces a number of new configuration options
37 | - Introduces `refreshCache` parameter to force cache update for a URL
38 | - Relaunches browser when the browser disconnects from Puppeteer
39 | - Now includes doctype in rendered output
40 | - Harmonises the configuration options for caches
41 | - Closes page after screenshot
42 | - Fixes security issue with AppEngine deployments
43 | - Fixes issue with specifying host and port
44 |
45 | ## [2.0.1] 2018-09-18
46 |
47 | - Remove testing and other files from NPM package.
48 | - Fix NPM main config.
49 | - Improved restrictions for endpoints.
50 | - Support for structured data by not stripping all script tags.
51 |
52 | ## [2.0.0] 2018-07-26
53 |
54 | - Rebuilt with Puppeteer under the hood
55 | - Rebuilt as Koa server instead of an Express server
56 | - Rebuilt using Typescript
57 | - Removed explicit rendering flag
58 | - Added support for a mobile parameter
59 | - Added more options for screenshots
60 |
61 | ## [1.1.1] 2018-01-05
62 |
63 | - Update `debug` flag to log requested URLs to render
64 | - Fix for renderComplete flag
65 | - Minor bug fixes
66 |
67 | ## [1.1.0] 2017-10-27
68 |
69 | - Initial release on NPM
70 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | - [How to Contribute](#how-to-contribute)
4 | - [Contributor License Agreement](#contributor-license-agreement)
5 | - [Getting Code](#getting-code)
6 | - [Code reviews](#code-reviews)
7 | - [Code Style](#code-style)
8 | - [Adding New Dependencies](#adding-new-dependencies)
9 | - [Running & Writing Tests](#running--writing-tests)
10 |
11 |
12 | # How to Contribute
13 |
14 | First of all, thank you for your interest in Rendertron!
15 | We'd love to accept your patches and contributions!
16 |
17 | ## Contributor License Agreement
18 |
19 | Contributions to this project must be accompanied by a Contributor License
20 | Agreement. You (or your employer) retain the copyright to your contribution,
21 | this simply gives us permission to use and redistribute your contributions as
22 | part of the project. Head over to to see
23 | your current agreements on file or to sign a new one.
24 |
25 | You generally only need to submit a CLA once, so if you've already submitted one
26 | (even if it was for a different project), you probably don't need to do it
27 | again.
28 |
29 | ## Getting Code
30 |
31 | 1. Clone this repository
32 |
33 | ```bash
34 | git clone https://github.com/GoogleChrome/rendertron
35 | cd rendertron
36 | ```
37 |
38 | 2. Install dependencies
39 |
40 | ```bash
41 | npm install
42 | ```
43 |
44 | 3. Run tests locally. For more information about tests, read [Running & Writing Tests](#running--writing-tests).
45 |
46 | ```bash
47 | npm test
48 | ```
49 |
50 | 4. (Optional) when developing cache related stuff you will need `npm run test-cache` commands for tests.
51 | - This requires the Google Cloud SDK, follow the steps described [here](https://cloud.google.com/sdk/docs/#install_the_latest_cloud_tools_version_cloudsdk_current_version) to install
52 | - run `gcloud components install beta cloud-datastore-emulator`
53 | - if you do not have Java 8+ JRE installed, you should install it too, as the Google Cloud Datastore emulator requires it
54 |
55 | ## Code reviews
56 |
57 | All submissions, including submissions by project members, require review. We
58 | use GitHub pull requests for this purpose. Consult
59 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
60 | information on using pull requests.
61 |
62 | ## Code Style
63 |
64 | - Coding style is fully defined in [tslint.json](https://github.com/GoogleChrome/rendertron/blob/master/tslint.json)
65 | - Comments should be generally avoided. If the code would not be understood without comments, consider re-writing the code to make it self-explanatory.
66 |
67 | To run code linter, use:
68 |
69 | ```bash
70 | npm run lint
71 | ```
72 |
73 | ## Adding New Dependencies
74 |
75 | For all dependencies (both installation and development):
76 |
77 | - **Do not add** a dependency if the desired functionality is easily implementable.
78 | - If adding a dependency, it should be well-maintained and trustworthy.
79 |
80 | A barrier for introducing new installation dependencies is especially high:
81 |
82 | - **Do not add** installation dependency unless it's critical to project success.
83 |
84 | ## Running & Writing Tests
85 |
86 | - Every feature should be accompanied by a test.
87 | - Tests should be _hermetic_. Tests should not depend on external services unless absolutely needed.
88 | - Tests should work on all three platforms: Mac, Linux and Windows.
89 |
90 | - To run all tests:
91 |
92 | ```bash
93 | npm test
94 | ```
95 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Rendertron
2 |
3 | [](https://github.com/GoogleChrome/rendertron/actions)
4 | [](https://npmjs.org/package/rendertron)
5 |
6 |
7 | ## Rendertron is deprecated
8 |
9 | Please note that this project is **deprecated**. Dynamic rendering [is not a recommended approach](https://developers.google.com/search/docs/crawling-indexing/javascript/dynamic-rendering) and [there are better approaches to rendering on the web](https://web.dev/rendering-on-the-web/).
10 |
11 | **Rendertron will not be actively maintained at this point.**
12 |
13 | > Rendertron is a headless Chrome rendering solution designed to render & serialise web pages on the fly.
14 |
15 | #### :hammer: Built with [Puppeteer](https://github.com/GoogleChrome/puppeteer)
16 |
17 | #### :cloud: Easy deployment to Google Cloud
18 |
19 | #### :mag: Improves SEO
20 |
21 | Rendertron is designed to enable your Progressive Web App (PWA) to serve the correct
22 | content to any bot that doesn't render or execute JavaScript. Rendertron runs as a
23 | standalone HTTP server. Rendertron renders requested pages using Headless Chrome,
24 | [auto-detecting](#auto-detecting-loading-function) when your PWA has completed loading
25 | and serializes the response back to the original request. To use Rendertron, your application
26 | configures [middleware](#middleware) to determine whether to proxy a request to Rendertron.
27 | Rendertron is compatible with all client side technologies, including [web components](#web-components).
28 |
29 | ## Contents
30 |
31 | - [Middleware](#middleware)
32 | - [API](#api)
33 | - [Render](#render)
34 | - [Screenshot](#screenshot)
35 | - [Invalidate cache](#invalidate-cache)
36 | - [FAQ](#faq)
37 | - [Query parameters](#query-parameters)
38 | - [Page render timing](#page-render-timing)
39 | - [Rendering budget timeout](#rendering-budget-timeout)
40 | - [Web components](#web-components)
41 | - [Status codes](#status-codes)
42 | - [Installing & deploying](#installing--deploying)
43 | - [Building](#building)
44 | - [Running locally](#running-locally)
45 | - [Deploying to Google Cloud Platform](#deploying-to-google-cloud-platform)
46 | - [Deploying using Docker](#deploying-using-docker)
47 | - [Config](#config)
48 | - [Troubleshooting](#troubleshooting)
49 |
50 | ## Middleware
51 |
52 | Once you have the service up and running, you'll need to implement the differential serving
53 | layer. This checks the user agent to determine whether prerendering is required.
54 |
55 | This is a list of middleware available to use with the Rendertron service:
56 |
57 | - [Express.js middleware](/middleware)
58 | - [Firebase functions](https://github.com/justinribeiro/pwa-firebase-functions-botrender) (Community maintained)
59 | - [ASP.net core middleware](https://github.com/galamai/AspNetCore.Rendertron) (Community maintained)
60 | - [Python (Django) middleware and decorator](https://github.com/frontendr/python-rendertron) (Community maintained)
61 |
62 | Rendertron is also compatible with [prerender.io middleware](https://prerender.io/documentation/install-middleware).
63 | Note: the user agent lists differ there.
64 |
65 | ## API
66 |
67 | ### Render
68 |
69 | ```
70 | GET /render/
71 | ```
72 |
73 | The `render` endpoint will render your page and serialize your page. Options are
74 | specified as query parameters:
75 |
76 | - `mobile` defaults to `false`. Enable by passing `?mobile` to request the
77 | mobile version of your site.
78 | - `refreshCache`: Pass `refreshCache=true` to ignore potentially cached render results
79 | and treat the request as if it is not cached yet.
80 | The new render result is used to replace the previous result.
81 |
82 | ### Screenshot
83 |
84 | ```
85 | GET /screenshot/
86 | POST /screenshot/
87 | ```
88 |
89 | The `screenshot` endpoint can be used to verify that your page is rendering
90 | correctly.
91 |
92 | Both endpoints support the following query parameters:
93 |
94 | - `width` defaults to `1000` - specifies viewport width.
95 | - `height` defaults to `1000` - specifies viewport height.
96 | - `mobile` defaults to `false`. Enable by passing `?mobile` to request the
97 | mobile version of your site.
98 | - `timezoneId` - specifies rendering for timezone.
99 |
100 | Additional options are available as a JSON string in the `POST` body. See
101 | [Puppeteer documentation](https://github.com/GoogleChrome/puppeteer/blob/v1.6.0/docs/api.md#pagescreenshotoptions)
102 | for available options. You cannot specify the `type` (defaults to `jpeg`) and
103 | `encoding` (defaults to `binary`) parameters.
104 |
105 | ### Invalidate cache
106 |
107 | ```
108 | GET /invalidate/
109 | ```
110 |
111 | The `invalidate` endpoint will remove cache entried for `` from the configured cache (in-memory, filesystem or cloud datastore).
112 |
113 | ## FAQ
114 |
115 | ### Query parameters
116 |
117 | When setting query parameters as part of your URL, ensure they are encoded correctly. In JS,
118 | this would be `encodeURIComponent(myURLWithParams)`. For example to specify `page=home`:
119 |
120 | ```
121 | https://render-tron.appspot.com/render/http://my.domain/%3Fpage%3Dhome
122 | ```
123 |
124 | ### Page render timing
125 |
126 | The service attempts to detect when a page has loaded by looking at the page load event, ensuring there
127 | are no outstanding network requests and that the page has had ample time to render.
128 |
129 | ### Rendering budget timeout
130 |
131 | There is a hard limit of 10 seconds for rendering. Ensure you don't hit this budget by ensuring
132 | your application is rendered well before the budget expires.
133 |
134 | ### Web components
135 |
136 | Headless Chrome supports web components but shadow DOM is difficult to serialize effectively.
137 | As such, [shady DOM](https://github.com/webcomponents/shadydom) (a lightweight shim for Shadow DOM)
138 | is required for web components.
139 |
140 | If you are using web components v0 (deprecated), you will need to enable Shady DOM to
141 | render correctly. In Polymer 1.x, which uses web components v0, Shady DOM is enabled by default.
142 | If you are using Shadow DOM, override this by setting the query parameter `dom=shady` when
143 | directing requests to the Rendertron service.
144 |
145 | If you are using web components v1 and either `webcomponents-lite.js` or `webcomponents-loader.js`,
146 | set the query parameter `wc-inject-shadydom=true` when directing requests to the Rendertron
147 | service. This renderer service will force the necessary polyfills to be loaded and enabled.
148 |
149 | ### Status codes
150 |
151 | Status codes from the initial requested URL are preserved. If this is a 200, or 304, you can
152 | set the HTTP status returned by the rendering service by adding a meta tag.
153 |
154 | ```html
155 |
156 | ```
157 |
158 | ## Running locally
159 |
160 | To install Rendertron and run it locally, first install Rendertron:
161 |
162 | ```bash
163 | npm install -g rendertron
164 | ```
165 |
166 | With Chrome installed on your machine run the Rendertron CLI:
167 |
168 | ```bash
169 | rendertron
170 | ```
171 |
172 | ## Installing & deploying
173 |
174 | ### Building
175 |
176 | Clone and install dependencies:
177 |
178 | ```bash
179 | git clone https://github.com/GoogleChrome/rendertron.git
180 | cd rendertron
181 | npm install
182 | npm run build
183 | ```
184 |
185 | ### Running locally
186 |
187 | With a local instance of Chrome installed, you can start the server locally:
188 |
189 | ```bash
190 | npm run start
191 | ```
192 |
193 | ### Deploying to Google Cloud Platform
194 |
195 | ```
196 | gcloud app deploy app.yaml --project
197 | ```
198 |
199 | ### Deploying using Docker
200 |
201 | Rendertron no longer includes a Docker file. Instead, refer to
202 | [Puppeteer documentation](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker)
203 | on how to deploy run headless Chrome in Docker.
204 |
205 | ### Config
206 |
207 | When deploying the service, set configuration variables by including a `config.json` in the
208 | root. Available configuration options:
209 |
210 | - `timeout` _default `10000`_ - set the timeout used to render the target page.
211 | - `port` _default `3000`_ - set the port to use for running and listening the rendertron service. Note if process.env.PORT is set, it will be used instead.
212 | - `host` _default `0.0.0.0`_ - set the hostname to use for running and listening the rendertron service. Note if process.env.HOST is set, it will be used instead.
213 | - `width` _default `1000`_ - set the width (resolution) to be used for rendering the page.
214 | - `height` _default `1000`_ - set the height (resolution) to be used for rendering the page.
215 | - `reqHeaders` _default `{}`_ - set the additional HTTP headers to be sent to the target page with every request.
216 | - `cache` _default `null`_ - set to `datastore` to enable caching on Google Cloud using datastore _only use if deploying to google cloud_, `memory` to enable in-memory caching or `filesystem` to enable disk based caching
217 | - `cacheConfig` - an object array to specify caching options
218 | - `renderOnly` - restrict the endpoint to only service requests for certain domains. Specified as an array of strings. eg. `['http://render.only.this.domain']`. This is a strict prefix match, so ensure you specify the exact protocols that will be used (eg. http, https).
219 | - `closeBrowser`_default `false`_ - `true` forces the browser to close and reopen between each page render, some sites might need this to prevent URLs past the first one rendered returning null responses.
220 | - `restrictedUrlPattern`_default `null`_ - set the restrictedUrlPattern to restrict the requests matching given regex pattern.
221 |
222 | #### cacheConfig
223 |
224 | - `cacheDurationMinutes` _default `1440`_ - set an expiry time in minues, defaults to 24 hours. Set to -1 to disable cache Expiration
225 | - `cacheMaxEntries` _default `100`_ - set the maximum number of entries stored in the selected cache method. Set to `-1` to allow unlimited caching. If using the datastore caching method, setting this value over `1000` may lead to degraded performance as the query to determine the size of the cache may be too slow. If you want to allow a larger cache in `datastore` consider setting this to `-1` and managing the the size of your datastore using a method like this [Deleting Entries in Bulk](https://cloud.google.com/datastore/docs/bulk-delete)
226 | - `snapshotDir` _default `/renderton`_ - **filesystem only** the directory the rendertron cache files will be stored in
227 |
228 | ##### Example
229 |
230 | An example config file specifying a memory cache, with a 2 hour expiration, and a maximum of 50 entries
231 |
232 | ```javascript
233 | {
234 | "cache": "memory",
235 | "cacheConfig": {
236 | "cacheDurationMinutes": 120,
237 | "cacheMaxEntries": 50
238 | }
239 | }
240 | ```
241 |
242 | ### Troubleshooting
243 |
244 | If you're having troubles with getting Headless Chrome to run in your
245 | environment, refer to the
246 | [troubleshooting guide](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md)
247 | for Puppeteer.
248 |
--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Rendertron",
3 | "description": "Rendertron is a headless Chrome rendering solution designed to render & serialise web pages on the fly.",
4 | "keywords": ["rendertron", "render", "web", "chrome"],
5 | "website": "https://github.com/GoogleChrome/rendertron",
6 | "buildpacks": [
7 | {
8 | "url": "heroku/google-chrome"
9 | },
10 | {
11 | "url": "heroku/nodejs"
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
1 | runtime: nodejs12
2 | instance_class: F4_1G
3 | automatic_scaling:
4 | min_instances: 1
5 | env_variables:
6 | DISABLE_LEGACY_METADATA_SERVER_ENDPOINTS: "true"
7 |
--------------------------------------------------------------------------------
/bin/rendertron:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | 'use strict';
4 |
5 | process.title = 'rendertron';
6 |
7 | var semver = require('semver');
8 |
9 | if (!semver.satisfies(process.version, '>=10')) {
10 | console.log('Rendertron requires Node 10+');
11 | process.exit(1);
12 | }
13 |
14 | const Rendertron = require('../build/rendertron.js').Rendertron;
15 | const rendertron = new Rendertron();
16 | rendertron.initialize();
17 |
--------------------------------------------------------------------------------
/docs/api-reference.md:
--------------------------------------------------------------------------------
1 | ## Rendertron API Reference
2 |
3 | ### HTTP API endpoints
4 |
5 | `/render`
6 |
7 | Fetch and serialize a URL in headless Chrome.
8 |
9 | | param | type | description |
10 | | ------------ | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
11 | | `url` | `String` | a valid URL to fetch |
12 | | `opts` | `Object` | `Renderer` config class options |
13 | | `timezoneId` | `String` | specify timezoneId from [list](https://source.chromium.org/chromium/chromium/deps/icu.git/+/faee8bc70570192d82d2978a71e2a615788597d1:source/data/misc/metaZones.txt) with a querystring appended to the requested URL. |
14 |
15 | `/screenshot`
16 |
17 | Return a screenshot of the requested URL
18 |
19 | ```javascript
20 | async screenshot(
21 | url: string,
22 | isMobile: boolean,
23 | dimensions: ViewportDimensions,
24 | options?: object,
25 | timezoneId?: string): Promise
26 | }
27 | ```
28 |
29 | | param | type | description |
30 | | ------------ | ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
31 | | `url` | `String` | A valid URL to fetch |
32 | | `isMobile` | `Bool` | Specify a mobile layout with a querystring automatically appended to the requested URL. |
33 | | `dimensions` | [`ViewportDimensions`](viewport-dimensions) | `height` and `width` specifications for the rendered page |
34 | | `options` | `Object` | define screenshot params |
35 | | `timezoneId` | `String` | define timezoneId from [list](https://source.chromium.org/chromium/chromium/deps/icu.git/+/faee8bc70570192d82d2978a71e2a615788597d1:source/data/misc/metaZones.txt) | |
36 |
37 | `/invalidate/`
38 |
39 | Removes the cached response for a given URL from the cache.
40 |
41 | | param | type | description |
42 | | ----- | -------- | ------------------------------------ |
43 | | `url` | `String` | A valid URL to remove from the cache |
44 |
45 | `/_ah/health`
46 |
47 | Returns HTTP 200 and text "OK", if the Rendertron server is healthy.
48 |
49 | ### Rendertron internal API
50 |
51 | #### `Renderer`
52 |
53 | Create a `puppeteer` instance to render the requested URL. Uses default `Config`
54 | class or a user-defined `Config` file.
55 |
56 | ```javascript
57 | export class Renderer {
58 | private browser: puppeteer.Browser;
59 | private config: Config;
60 |
61 | constructor(browser: puppeteer.Browser, config: Config) {
62 | this.browser = browser;
63 | this.config = config;
64 | }
65 | ```
66 |
67 | #### `Config`
68 |
69 | The `Config` class defaults can be overridden with your own settings.
70 | [More details](https://github.com/GoogleChrome/rendertron/blob/master/docs/configure.md)
71 |
72 | ```javascript
73 | public static config: Config = {
74 | cache: null,
75 | timeout: 10000,
76 | port: '3000',
77 | host: '0.0.0.0',
78 | width: 1000,
79 | height: 1000,
80 | headers: {}
81 | };
82 | ```
83 |
84 | #### `ViewportDimensions`
85 |
86 | An Object setting the width and height of the requested resource.
87 |
88 | ```javascript
89 | type ViewportDimensions = {
90 | width: number,
91 | height: number,
92 | };
93 | ```
94 |
95 | #### `Options`
96 |
97 | Specify the screenshot file type.
98 |
99 | ```javascript
100 | const screenshotOptions = Object.assign({}, options, {
101 | type: 'jpeg',
102 | encoding: 'binary',
103 | });
104 | ```
105 |
106 | `/invalidate`
107 |
108 | Invalidate all cache entries present in the configured cache (memory, filesystem or cloud datastore).
109 | (Only available if cache is configured)
110 |
111 | `/invalidate`
112 |
113 | Invalidate a cache entry from memory, filesystem or cloud datastore.
114 | (Only available if cache is configured)
115 |
116 | | param | type | description |
117 | | ----- | -------- | -------------------------- |
118 | | `url` | `String` | URL to invalidate in cache |
119 |
120 | ###
121 |
--------------------------------------------------------------------------------
/docs/configure.md:
--------------------------------------------------------------------------------
1 | # Config
2 |
3 | When deploying the service, set configuration variables by including a `config.json` in the
4 | root. Available configuration options:
5 |
6 | - `timeout` _default `10000`_ - set the timeout used to render the target page.
7 | - `port` _default `3000`_ - set the port to use for running and listening the rendertron service. Note if process.env.PORT is set, it will be used instead.
8 | - `host` _default `0.0.0.0`_ - set the hostname to use for running and listening the rendertron service. Note if process.env.HOST is set, it will be used instead.
9 | - `width` _default `1000`_ - set the width (resolution) to be used for rendering the page.
10 | - `height` _default `1000`_ - set the height (resolution) to be used for rendering the page.
11 | - `cache` _default `null`_ - set to `datastore` to enable caching on Google Cloud using datastore _only use if deploying to google cloud_, `memory` to enable in-memory caching or `filesystem` to enable disk based caching
12 | - `cacheConfig` - an object array to specify caching options
13 | - `renderOnly` - restrict the endpoint to only service requests for certain domains. Specified as an array of strings. eg. `['http://render.only.this.domain']`. This is a strict prefix match, so ensure you specify the exact protocols that will be used (eg. http, https).
14 | - `closeBrowser`_default `false`_ - `true` forces the browser to close and reopen between each page render, some sites might need this to prevent URLs past the first one rendered returning null responses.
15 |
16 | ## cacheConfig
17 |
18 | - `cacheDurationMinutes` _default `1440`_ - set an expiry time in minues, defaults to 24 hours. Set to -1 to disable cache Expiration
19 | - `cacheMaxEntries` _default `100`_ - set the maximum number of entries stored in the selected cache method. Set to `-1` to allow unlimited caching. If using the datastore caching method, setting this value over `1000` may lead to degraded performance as the query to determine the size of the cache may be too slow. If you want to allow a larger cache in `datastore` consider setting this to `-1` and managing the the size of your datastore using a method like this [Deleting Entries in Bulk](https://cloud.google.com/datastore/docs/bulk-delete)
20 | - `snapshotDir` _default `/renderton`_ - **filesystem only** the directory the rendertron caches will be stored in
21 |
22 | ### Example
23 |
24 | An example config file specifying a memory cache, with a 2 hour expiration, and a maximum of 50 entries
25 |
26 | ```javascript
27 | {
28 | "cache": "memory",
29 | "cacheConfig": {
30 | "cacheDurationMinutes": 120,
31 | "cacheMaxEntries": 50
32 | }
33 | }
34 | ```
35 |
--------------------------------------------------------------------------------
/docs/deploy.md:
--------------------------------------------------------------------------------
1 | # Deploying Rendertron to Google Cloud Platform
2 |
3 | Rendertron runs a server that takes a URL and returns static HTML for the URL by using headless Chromium. This guide follows
4 | `https://github.com/GoogleChrome/rendertron#deploying-to-google-cloud-platform`
5 |
6 | - To clone the Rendertron repository from GitHub, run the following command:
7 | `git clone https://github.com/GoogleChrome/rendertron.git`
8 |
9 | - Change directories:
10 | `cd rendertron`
11 |
12 | - To install dependencies and build Rendertron on your computer, run the following command:
13 | `npm install && npm run build`
14 |
15 | - Create a new file called config.json in the rendertron directory with the following content to enable Rendertron's cache:
16 | `{ "datastoreCache": true }`
17 |
18 | - From the rendertron directory, run the following command. Replace YOUR_PROJECT_ID with your project ID that you set in Google Cloud Platform.
19 | `gcloud app deploy app.yaml --project YOUR_PROJECT_ID`
20 |
21 | - Select a region of your choice and confirm the deployment. Wait for the command to finish.
22 |
23 | - Enter YOUR_PROJECT_ID.appspot.com in your browser. Replace YOUR_PROJECT_ID with your actual project ID that you set in Google Cloud Platform. You should see Rendertron's interface with an input field and a few buttons.
24 |
25 | - When you see the Rendertron web interface, you have successfully deployed your own Rendertron instance. Take note of your project's URL (YOUR_PROJECT_ID.appspot.com) as you will need it later.
26 |
27 | # Deploying Rendertron to Heroku
28 |
29 | [](https://dashboard.heroku.com/new?button-url=https://github.com/GoogleChrome/rendertron/tree/main&template=https://github.com/GoogleChrome/rendertron/tree/main)
30 |
31 | Setup Herokuapp and Heroku CLI
32 | `https://devcenter.heroku.com/articles/heroku-cli`
33 |
34 | First, add the Google Chrome buildpack to your project:
35 |
36 | ```
37 | $ heroku buildpacks:set https://github.com/heroku/heroku-buildpack-google-chrome.git -a
38 | ```
39 |
40 | Next, add the `heroku/nodejs` buildpack to your project:
41 |
42 | ```
43 | $ heroku buildpacks:add --index 2 heroku/nodejs -a
44 | ```
45 |
46 | Then, update the `package.json` entry for `engines` to specific node and npm versions. I used:
47 |
48 | ```
49 | {
50 | ...
51 | "engines": {
52 | "node": "10.15.1",
53 | "npm": "6.4.1"
54 | },
55 | ...
56 | }
57 | ```
58 |
59 | This was helpful in getting past a `node-gyp` issue during `npm install`, which Heroku will run each time you deploy.
60 |
61 | Next, enter a new script into your `package.json`:
62 |
63 | ```
64 | {
65 | "scripts": {
66 | ...,
67 | "heroku-postbuild": "npm run build"
68 | }
69 | }
70 | ```
71 |
72 | This will make sure to build rendertron into `bin/rendertron` on each deploy, in case you have any local changes.
73 |
74 | Finally, add a `Procfile` to your project with the following:
75 |
76 | ```
77 | web: node bin/rendertron
78 | ```
79 |
80 | # Deploying Rendertron in a docker container
81 |
82 | Based on Puppeteer instructions we can create a docker image that bundles a headless chrome and rendertron. We can start from node 14 base image.
83 |
84 | For more information about chrome installation please see the pupeteer page: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-in-docker
85 |
86 | If you don't want to use rendertron default configurations you can create a config.json file. This file must be created at the project root level, in the same directory as the Dockerfile.
87 |
88 | ```
89 | {
90 | "cache": "filesystem",
91 | "cacheConfig": {
92 | "cacheDurationMinutes": 7200,
93 | "cacheMaxEntries": 1000,
94 | "snapshotDir": "/cache"
95 | }
96 | }
97 | ```
98 |
99 | Then we can define the Dockerfile like this:
100 |
101 | ```
102 | FROM node:14.11.0-stretch
103 |
104 | RUN apt-get update \
105 | && apt-get install -y wget gnupg \
106 | && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
107 | && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
108 | && apt-get update \
109 | && apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 \
110 | --no-install-recommends \
111 | && rm -rf /var/lib/apt/lists/*
112 |
113 | # This directoty will store cached files as specified in the config.json.
114 | # If you haven't defined the cacheConfig.snapshotDir property you can remove the following line
115 | RUN mkdir /cache
116 |
117 | RUN git clone https://github.com/GoogleChrome/rendertron.git
118 |
119 | WORKDIR /rendertron
120 |
121 | RUN npm install && npm run build
122 |
123 | # If you aren't using a custom config.json file you must remove the following line
124 | ADD config.json .
125 |
126 | EXPOSE 3000
127 |
128 | CMD ["npm", "run", "start"]
129 |
130 | ```
131 |
132 | And we can build an image using the previous Dockerfile:
133 |
134 | ```
135 | docker build . -t rendertron:3.0
136 | docker run -d --log-opt max-size=100m --log-opt max-file=3 --name rendertron -p 3000:3000 rendertron:3.0
137 | ```
138 |
139 | The rendertron api will be avaiable at localhost:3000
140 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ## What is Rendertron?
2 |
3 | > Rendertron is a headless Chrome rendering solution designed to render &
4 | > serialise web pages on the fly.
5 |
6 | -
7 | Built with [Puppeteer](https://github.com/GoogleChrome/puppeteer)
8 | -
9 | Easy deployment to Google Cloud
10 | -
11 | Improves SEO
12 |
13 | Rendertron is designed to enable your Progressive Web App (PWA) to serve the
14 | correct content to any bot that doesn't render or execute JavaScript. Rendertron
15 | runs as a standalone HTTP server. Rendertron renders requested pages using
16 | Headless Chrome, [auto-detecting](#auto-detecting-loading-function) when your
17 | PWA has completed loading and serializes the response back to the original
18 | request. To use Rendertron, your application configures
19 | [middleware](#middleware) to determine whether to proxy a request to Rendertron.
20 | Rendertron is compatible with all client side technologies, including
21 | [web components](#web-components).
22 |
23 | ## Demo endpoint
24 |
25 | A demo Rendertron service is available at https://render-tron.appspot.com/. It
26 | is not designed to be used as a production endpoint. You can use it, but there
27 | are no uptime guarantees.
28 |
29 | ## Learn more
30 |
31 | - [Rendertron user guide](users-guide)
32 | - [Configuring Rendertron](configure)
33 | - [Deploying Rendertron](deploy)
34 | - [Using Rendertron with your server](server-setup)
35 | - [API Reference](api-reference)
36 | - [Best practices](best_practices)
37 | - [Contributing to Rendertron](https://github.com/GoogleChrome/rendertron/blob/master/CONTRIBUTING.md)
38 |
--------------------------------------------------------------------------------
/docs/server-setup.md:
--------------------------------------------------------------------------------
1 | ## Using Rendertron with your server
2 |
3 | Rendertron provides a web server that accepts request for rendering pages.
4 | Usually your web application already uses a web server and you need to configure your web server to redirect requests from the desired bots and crawlers to your Rendertron server.
5 |
6 | This can be done with every common web server but the steps differ between them.
7 | Here are guides for some web servers:
8 |
9 | - [Apache](./server-setup/apache)
10 | - [nginx](./server-setup/nginx)
11 | - [express.js](./server-setup/expressjs)
12 |
13 | If you have instructions for other web servers, please consider [contributing to this page](https://github.com/googlechrome/rendertron)!
14 |
--------------------------------------------------------------------------------
/docs/server-setup/apache.md:
--------------------------------------------------------------------------------
1 | ## Set up Rendertron with Apache
2 |
3 | To use Rendertron with Apache, set up a conditional URL rewrite based on the user agent.
4 | You can do this either in an `.htaccess` file, the `VirtualHost` configuration or the main configuration file.
5 |
6 | ### Prerequisites
7 |
8 | Your Apache needs to have `mod_rewrite` and `mod_proxy_http` enabled for this configuration. On Debian and Ubuntu, run these commands to activate these modules:
9 |
10 | ```
11 | sudo a2enmod rewrite proxy_http
12 | sudo service apache2 restart
13 | ```
14 |
15 | ### Basic configuration
16 |
17 | Use the following configuration to send all requests from user agents containing `bot` to Rendertron:
18 |
19 | ```
20 | RewriteEngine On
21 | RewriteCond %{HTTP_USER_AGENT} bot
22 | # Replace the PUT-YOUR-RENDERTRON-URL-HERE with the URL of your Rendertron instance
23 | # Replace YOUR-WEBAPP-ROOT-URL with the base URL of your web application (e.g. example.com)
24 | RewriteRule ^(.*)$ https://PUT-YOUR-RENDERTRON-URL-HERE/render/https://YOUR-WEBAPP-ROOT-URL$1 [P,L]
25 | ```
26 |
27 | ### Sending multiple bot user agents to Rendertron
28 |
29 | To make your Apache web server send requests from a list of bots to your Rendertron instance, use this syntax:
30 |
31 | ```
32 | RewriteEngine On
33 | RewriteCond %{HTTP_USER_AGENT} facebookexternalhit|linkedinbot|twitterbot
34 | # Replace the PUT-YOUR-RENDERTRON-URL-HERE with the URL of your Rendertron instance
35 | # Replace YOUR-WEBAPP-ROOT-URL with the base URL of your web application (e.g. example.com)
36 | RewriteRule ^(.*)$ https://PUT-YOUR-RENDERTRON-URL-HERE/render/https://YOUR-WEBAPP-ROOT-URL$1 [P,L]
37 | ```
38 |
39 | Separate the bot names with the pipe (`|`) character.
40 | This configuration is case-sensitive, so `googlebot` works while `Googlebot` doesn't.
41 |
--------------------------------------------------------------------------------
/docs/server-setup/expressjs.md:
--------------------------------------------------------------------------------
1 | ## Set up Rendertron with express.js
2 |
3 | If you use [expressjs](https://expressjs.com) you can use the [rendertron-middleware] to add Rendertron to your express.js application.
4 |
5 | ### Install rendertron-middleware
6 |
7 | Inside the root directory of your web application, run the following command:
8 |
9 | ```
10 | npm install --save rendertron-middleware
11 | ```
12 |
13 | ### Setup your express.js application to use the middleware
14 |
15 | ```javascript
16 | const express = require('express');
17 | const rendertron = require('rendertron-middleware');
18 |
19 | const app = express();
20 |
21 | app.use(
22 | rendertron.makeMiddleware({
23 | // replace this with the web address of your rendertron instance
24 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render',
25 | })
26 | );
27 |
28 | app.use(express.static('files'));
29 | app.listen(8080);
30 | ```
31 |
32 | ### Configure which user agents are pre-rendered with Rendertron
33 |
34 | The middleware comes with a pre-configured [bot list](https://github.com/GoogleChrome/rendertron/blob/a1dd3ab1f054bc19e89dcdecdb71dc004f7d068e/middleware/src/middleware.ts#L24-L41).
35 |
36 | If you wish to use Rendertron for other bots, you can either _replace_ or _extend_ this list.
37 |
38 | To replace the list with your own, configure the middleware like this:
39 |
40 | ```javascript
41 | // only use Rendertron for LinkedInBot and Twitterbot
42 | const myBotList = ['linkedinbot', 'twitterbot'];
43 |
44 | app.use(
45 | rendertron.makeMiddleware({
46 | // replace the default bot list with your own:
47 | userAgentPattern: new RegExp(myBotList.join('|'), 'i'),
48 | // replace this with the web address of your rendertron instance
49 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render',
50 | })
51 | );
52 | ```
53 |
54 | You can also extend the bot list to include more bots:
55 |
56 | ```javascript
57 | // add googlebot and yolobot to bot list
58 | const myBotList = rendertron.botUserAgents.concat(['googlebot', 'yolobot']);
59 |
60 | app.use(
61 | rendertron.makeMiddleware({
62 | // use the extended bot list:
63 | userAgentPattern: new RegExp(myBotList.join('|'), 'i'),
64 | // replace this with the web address of your rendertron instance
65 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render',
66 | })
67 | );
68 | ```
69 |
--------------------------------------------------------------------------------
/docs/server-setup/nginx.md:
--------------------------------------------------------------------------------
1 | ## Set up Rendertron with nginx
2 |
3 | To use Rendertron with nginx, [set up nginx as a reverse proxy](https://docs.nginx.com/nginx/admin-guide/web-server/reverse-proxy/).
4 |
5 | To use Rendertron only for bots, check the `$http_user_agent`. When it's looking like a bot, send the request to Rendertron, otherwise send it to your web application directly.
6 |
7 | ### Sample configuration for a single bot
8 |
9 | To send requests from user agents containing `bot` to Rendertron, use the following configuration:
10 |
11 | ```
12 | server {
13 | listen 80;
14 | server_name example.com;
15 | # ...other configuration...
16 |
17 | # only send requests from user agents containing the word "bot" to Rendertron
18 | if ($http_user_agent ~* 'bot') {
19 | rewrite ^(.*)$ /rendertron/$1;
20 | }
21 |
22 | location /rendertron/ {
23 | proxy_set_header X-Real-IP $remote_addr;
24 | proxy_set_header X-Forwarded-For $remote_addr;
25 | # replace PUT-YOUR-RENDERTRON-URL-HERE with your rendertron server address below
26 | proxy_pass http://PUT-YOUR-RENDERTRON-URL-HERE/render/$scheme://$host:$server_port$request_uri;
27 | }
28 | }
29 | ```
30 |
31 | ### Setting up Rendertron for multiple bot user agents:
32 |
33 | To enable Rendertron for a list of (bot) user agents, you can map the `$http_user_agent` to a custom variable indicating if you consider this user agent a bot. To do so, add this to your `nginx.conf`:
34 |
35 | ```
36 | # Creates $is_bot variable and match user agents
37 | map $http_user_agent $is_bot {
38 | default 0;
39 | '~*googlebot' 1;
40 | '~*bingbot' 1;
41 | # add more lines for other user agents here
42 | }
43 | ```
44 |
45 | In your site configuration, you can use the following to send requests where `$is_bot` is 1 to Rendertron:
46 |
47 | ```
48 | server {
49 | listen 80;
50 | server_name example.com;
51 | # ...other configuration...
52 | # only send requests from user agents containing the word "bot" to Rendertron
53 | if ($is_bot = 1) {
54 | rewrite ^(.*)$ /rendertron/$1;
55 | }
56 |
57 | location /rendertron/ {
58 | proxy_set_header X-Real-IP $remote_addr;
59 | proxy_set_header X-Forwarded-For $remote_addr;
60 | # replace PUT-YOUR-RENDERTRON-URL-HERE with your rendertron server address below
61 | proxy_pass http://PUT-YOUR-RENDERTRON-URL-HERE/render/$scheme://$host:$server_port$request_uri;
62 | }
63 | }
64 | ```
65 |
--------------------------------------------------------------------------------
/middleware/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 |
4 |
5 | ## [0.1.5] 2018-12-14
6 |
7 | - Add TelegramBot as bot.
8 |
9 | ## [0.1.4] 2018-12-10
10 |
11 | - Add Whatsapp as bot.
12 |
13 | ## [0.1.3] 2018-09-18
14 |
15 | - Converted to TypeScript.
16 |
17 | ## [0.1.2] 2017-08-29
18 |
19 | - Fix bug with wc-inject-shadydom URL parameter.
20 |
21 | ## [0.1.1] 2017-08-23
22 |
23 | - Remove broken typings configuration.
24 |
25 | ## [0.1.0] 2017-08-17
26 |
27 | - Initial release.
28 |
--------------------------------------------------------------------------------
/middleware/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/middleware/README.md:
--------------------------------------------------------------------------------
1 | # rendertron-middleware
2 |
3 | [](https://github.com/GoogleChrome/rendertron/actions)
4 | [](https://www.npmjs.com/package/rendertron-middleware)
5 |
6 | An Express middleware for [Rendertron](https://github.com/samuelli/bot-render).
7 |
8 | Rendertron is a server which runs headless Chrome and renders web pages on the fly, which can be set up to serve pages to search engines, social networks and link rendering bots.
9 |
10 | This middleware checks the User-Agent header of incoming requests, and if it matches one of a configurable set of bots, proxies that request through Rendertron.
11 |
12 | ## Usage
13 |
14 | ```sh
15 | $ npm install --save express rendertron-middleware
16 | ```
17 |
18 | ```js
19 | const express = require('express');
20 | const rendertron = require('rendertron-middleware');
21 |
22 | const app = express();
23 |
24 | app.use(
25 | rendertron.makeMiddleware({
26 | proxyUrl: 'http://my-rendertron-instance/render',
27 | })
28 | );
29 |
30 | app.use(express.static('files'));
31 | app.listen(8080);
32 | ```
33 |
34 | ## Configuration
35 |
36 | The `makeMiddleware` function takes a configuration object with the following
37 | properties:
38 |
39 | | Property | Default | Description |
40 | | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
41 | | `proxyUrl` | _Required_ | Base URL of your running Rendertron proxy service. |
42 | | `userAgentPattern` | A set of known bots that benefit from pre-rendering. [Full list.](https://github.com/samuelli/bot-render/blob/master/middleware/src/middleware.ts) | RegExp for matching requests by User-Agent header. |
43 | | `excludeUrlPattern` | A set of known static file extensions. [Full list.](https://github.com/samuelli/bot-render/blob/master/middleware/src/middleware.ts) | RegExp for excluding requests by the path component of the URL. |
44 | | `injectShadyDom` | `false` | Force the web components polyfills to be loaded. [Read more.](https://github.com/samuelli/bot-render#web-components) |
45 | | `timeout` | `11000` | Millisecond timeout for the proxy request to Rendertron. If exceeded, the standard response is served (i.e. `next()` is called). This is **not** the timeout for the Rendertron server itself. See also the [Rendertron timeout.](https://github.com/googlechrome/rendertron#rendering-budget-timeout) |
46 | | `allowedForwardedHosts` | `[]` | If a forwarded host header is found and matches one of the hosts in this array, then that host will be used for the request to the rendertron server instead of the actual host of the current request. This is usedful if this middleware is running on a different host which is proxied behind the actual site, and the rendertron server should request the main site. **Note:** For security, because the header info is untrusted, only those hosts which you explicitly allow will be forwarded, otherwise they will be ignored. Leaving this undefined or empty (the default) will disable host forwarding. |
47 | | `forwardedHostHeader` | `"X-Forwarded-Host"` | Header used to determine the forwarded host that should be used when building the URL to be rendered. Only used if `allowedForwardedHosts` is not empty. |
48 |
--------------------------------------------------------------------------------
/middleware/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "rendertron-middleware",
3 | "version": "0.1.6",
4 | "description": "Express middleware for the rendertron service.",
5 | "engines": {
6 | "node": ">=10"
7 | },
8 | "main": "build/middleware.js",
9 | "types": "build/middleware.d.ts",
10 | "scripts": {
11 | "build": "tsc",
12 | "prepack": "npm run build",
13 | "test": "npm run build && ava build/*/*-test.js"
14 | },
15 | "files": [
16 | "build/",
17 | "!build/test/"
18 | ],
19 | "license": "Apache-2.0",
20 | "dependencies": {
21 | "request": "^2.88.2"
22 | },
23 | "devDependencies": {
24 | "@types/express": "^4.17.11",
25 | "@types/request": "^2.48.5",
26 | "@types/supertest": "^2.0.11",
27 | "ava": "^3.15.0",
28 | "express": "^4.17.1",
29 | "supertest": "^6.1.3",
30 | "typescript": "~4.2.4"
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/middleware/src/middleware.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2017 Google Inc. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 |
17 | import express from 'express';
18 | import request from 'request';
19 |
20 | /**
21 | * A default set of user agent patterns for bots/crawlers that do not perform
22 | * well with pages that require JavaScript.
23 | */
24 | export const botUserAgents = [
25 | 'Baiduspider',
26 | 'bingbot',
27 | 'Embedly',
28 | 'facebookexternalhit',
29 | 'LinkedInBot',
30 | 'outbrain',
31 | 'pinterest',
32 | 'quora link preview',
33 | 'rogerbot',
34 | 'showyoubot',
35 | 'Slackbot',
36 | 'TelegramBot',
37 | 'Twitterbot',
38 | 'vkShare',
39 | 'W3C_Validator',
40 | 'WhatsApp',
41 | ];
42 |
43 | /**
44 | * A default set of file extensions for static assets that do not need to be
45 | * proxied.
46 | */
47 | const staticFileExtensions = [
48 | 'ai',
49 | 'avi',
50 | 'css',
51 | 'dat',
52 | 'dmg',
53 | 'doc',
54 | 'doc',
55 | 'exe',
56 | 'flv',
57 | 'gif',
58 | 'ico',
59 | 'iso',
60 | 'jpeg',
61 | 'jpg',
62 | 'js',
63 | 'less',
64 | 'm4a',
65 | 'm4v',
66 | 'mov',
67 | 'mp3',
68 | 'mp4',
69 | 'mpeg',
70 | 'mpg',
71 | 'pdf',
72 | 'png',
73 | 'ppt',
74 | 'psd',
75 | 'rar',
76 | 'rss',
77 | 'svg',
78 | 'swf',
79 | 'tif',
80 | 'torrent',
81 | 'ttf',
82 | 'txt',
83 | 'wav',
84 | 'wmv',
85 | 'woff',
86 | 'xls',
87 | 'xml',
88 | 'zip',
89 | ];
90 |
91 | /**
92 | * Options for makeMiddleware.
93 | */
94 | export interface Options {
95 | /**
96 | * Base URL of the Rendertron proxy service. Required.
97 | */
98 | proxyUrl: string;
99 |
100 | /**
101 | * Regular expression to match user agent to proxy. Defaults to a set of bots
102 | * that do not perform well with pages that require JavaScript.
103 | */
104 | userAgentPattern?: RegExp;
105 |
106 | /**
107 | * Regular expression used to exclude request URL paths. Defaults to a set of
108 | * typical static asset file extensions.
109 | */
110 | excludeUrlPattern?: RegExp;
111 |
112 | /**
113 | * Force web components polyfills to be loaded and enabled. Defaults to false.
114 | */
115 | injectShadyDom?: boolean;
116 |
117 | /**
118 | * Millisecond timeout for proxy requests. Defaults to 11000 milliseconds.
119 | */
120 | timeout?: number;
121 |
122 | /**
123 | * If a forwarded host header is found and matches one of the hosts in this
124 | * array, then that host will be used for the request to the rendertron server
125 | * instead of the actual host of the request.
126 | * This is usedful if this middleware is running on a different host
127 | * which is proxied behind the actual site, and the rendertron server should
128 | * request the main site.
129 | */
130 | allowedForwardedHosts?: string[];
131 |
132 | /**
133 | * Header used to determine the forwarded host that should be used when
134 | * building the URL to be rendered. Only applicable if `allowedForwardedHosts`
135 | * is not empty.
136 | * Defaults to `"X-Forwarded-Host"`.
137 | */
138 | forwardedHostHeader?: string;
139 | }
140 |
141 | /**
142 | * Create a new Express middleware function that proxies requests to a
143 | * Rendertron bot rendering service.
144 | */
145 | export function makeMiddleware(options: Options): express.Handler {
146 | if (!options || !options.proxyUrl) {
147 | throw new Error('Must set options.proxyUrl.');
148 | }
149 | let proxyUrl = options.proxyUrl;
150 | if (!proxyUrl.endsWith('/')) {
151 | proxyUrl += '/';
152 | }
153 | const userAgentPattern =
154 | options.userAgentPattern || new RegExp(botUserAgents.join('|'), 'i');
155 | const excludeUrlPattern =
156 | options.excludeUrlPattern ||
157 | new RegExp(`\\.(${staticFileExtensions.join('|')})$`, 'i');
158 | const injectShadyDom = !!options.injectShadyDom;
159 | // The Rendertron service itself has a hard limit of 10 seconds to render, so
160 | // let's give a little more time than that by default.
161 | const timeout = options.timeout || 11000; // Milliseconds.
162 | const allowedForwardedHosts = options.allowedForwardedHosts || [];
163 | const forwardedHostHeader = allowedForwardedHosts.length
164 | ? options.forwardedHostHeader || 'X-Forwarded-Host'
165 | : null;
166 |
167 | return function rendertronMiddleware(req, res, next) {
168 | const ua = req.headers['user-agent'];
169 | if (
170 | ua === undefined ||
171 | !userAgentPattern.test(ua) ||
172 | excludeUrlPattern.test(req.path)
173 | ) {
174 | next();
175 | return;
176 | }
177 | const forwardedHost = forwardedHostHeader && req.get(forwardedHostHeader);
178 | const host =
179 | forwardedHost && allowedForwardedHosts.includes(forwardedHost)
180 | ? forwardedHost
181 | : req.get('host');
182 | const incomingUrl = req.protocol + '://' + host + req.originalUrl;
183 | let renderUrl = proxyUrl + encodeURIComponent(incomingUrl);
184 | if (injectShadyDom) {
185 | renderUrl += '?wc-inject-shadydom=true';
186 | }
187 | request({ url: renderUrl, timeout }, (e) => {
188 | if (e) {
189 | console.error(
190 | `[rendertron middleware] ${e.code} error fetching ${renderUrl}`
191 | );
192 | next();
193 | }
194 | }).pipe(res);
195 | };
196 | }
197 |
--------------------------------------------------------------------------------
/middleware/src/test/middleware-test.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2017 Google Inc. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 |
17 | import test from 'ava';
18 | import express from 'express';
19 | import net from 'net';
20 | import supertest from 'supertest';
21 |
22 | import * as rendertron from '../middleware';
23 |
24 | /**
25 | * Start the given Express app on localhost with a random port.
26 | */
27 | async function listen(app: express.Application): Promise {
28 | return new Promise((resolve: (url: string) => void) => {
29 | const server = app.listen(/* random */ 0, 'localhost', () => {
30 | resolve(`http://localhost:${(server.address() as net.AddressInfo).port}`);
31 | });
32 | });
33 | }
34 |
35 | /**
36 | * Make an Express app that uses the Rendertron middleware and returns
37 | * "fallthrough" if the middleware skipped the request (i.e. called `next`).
38 | */
39 | function makeApp(options: rendertron.Options) {
40 | return express()
41 | .use(rendertron.makeMiddleware(options))
42 | .use((_req, res) => res.end('fallthrough'));
43 | }
44 |
45 | /**
46 | * Make an Express app that takes the place of a Rendertron server instance and
47 | * always responds with "proxy ".
48 | */
49 | function makeProxy() {
50 | return express().use((req, res) => {
51 | res.end('proxy ' + decodeURIComponent(req.url.substring(1)));
52 | });
53 | }
54 |
55 | const bot = 'slackbot';
56 | const human = 'Chrome';
57 |
58 | /**
59 | * GET a URL with the given user agent.
60 | */
61 | async function get(
62 | userAgent: string,
63 | host: string,
64 | path: string,
65 | headers?: Record
66 | ) {
67 | const t = supertest(host).get(path).set('User-Agent', userAgent);
68 | if (headers) {
69 | for (const key in headers) {
70 | t.set(key, headers[key]);
71 | }
72 | }
73 | return await t;
74 | }
75 |
76 | test('makes a middleware function', async (t) => {
77 | const m = rendertron.makeMiddleware({ proxyUrl: 'http://example.com' });
78 | t.truthy(m);
79 | });
80 |
81 | test('throws if no proxyUrl given', async (t) => {
82 | const makeMiddlewareUntyped = rendertron.makeMiddleware as (
83 | options?: unknown
84 | ) => express.Application;
85 | t.throws(() => makeMiddlewareUntyped());
86 | t.throws(() => makeMiddlewareUntyped({}));
87 | t.throws(() => makeMiddlewareUntyped({ proxyUrl: '' }));
88 | });
89 |
90 | test('proxies through given url', async (t) => {
91 | const proxyUrl = await listen(makeProxy());
92 | const appUrl = await listen(makeApp({ proxyUrl }));
93 |
94 | const res = await get(bot, appUrl, '/foo');
95 | t.is(res.status, 200);
96 | t.is(res.text, 'proxy ' + appUrl + '/foo');
97 | });
98 |
99 | test('proxyUrl can have trailing slash', async (t) => {
100 | const proxyUrl = await listen(makeProxy());
101 | // Make sure our other tests are testing the no-trailing-slash case.
102 | t.false(proxyUrl.endsWith('/'));
103 | const appUrl = await listen(makeApp({ proxyUrl: proxyUrl + '/' }));
104 |
105 | const res = await get(bot, appUrl, '/foo');
106 | t.is(res.status, 200);
107 | t.is(res.text, 'proxy ' + appUrl + '/foo');
108 | });
109 |
110 | test('adds shady dom parameter', async (t) => {
111 | const proxyUrl = await listen(makeProxy());
112 | const appUrl = await listen(makeApp({ proxyUrl, injectShadyDom: true }));
113 |
114 | const res = await get(bot, appUrl, '/foo');
115 | t.is(res.status, 200);
116 | t.is(res.text, 'proxy ' + appUrl + '/foo?wc-inject-shadydom=true');
117 | });
118 |
119 | test('excludes static file paths by default', async (t) => {
120 | const proxyUrl = await listen(makeProxy());
121 | const appUrl = await listen(makeApp({ proxyUrl }));
122 |
123 | const res = await get(bot, appUrl, '/foo.png');
124 | t.is(res.text, 'fallthrough');
125 | });
126 |
127 | test('url exclusion only matches url path component', async (t) => {
128 | const proxyUrl = await listen(makeProxy());
129 | const appUrl = await listen(makeApp({ proxyUrl }));
130 |
131 | const res = await get(bot, appUrl, '/foo.png?params');
132 | t.is(res.text, 'fallthrough');
133 | });
134 |
135 | test('excludes non-bot user agents by default', async (t) => {
136 | const proxyUrl = await listen(makeProxy());
137 | const appUrl = await listen(makeApp({ proxyUrl }));
138 |
139 | const res = await get(human, appUrl, '/foo');
140 | t.is(res.text, 'fallthrough');
141 | });
142 |
143 | test('respects custom user agent pattern', async (t) => {
144 | const proxyUrl = await listen(makeProxy());
145 | const appUrl = await listen(makeApp({ proxyUrl, userAgentPattern: /borg/ }));
146 |
147 | let res;
148 |
149 | res = await get('humon', appUrl, '/foo');
150 | t.is(res.text, 'fallthrough');
151 |
152 | res = await get('borg', appUrl, '/foo');
153 | t.is(res.text, 'proxy ' + appUrl + '/foo');
154 | });
155 |
156 | test('respects custom exclude url pattern', async (t) => {
157 | const proxyUrl = await listen(makeProxy());
158 | const appUrl = await listen(makeApp({ proxyUrl, excludeUrlPattern: /foo/ }));
159 |
160 | let res;
161 |
162 | res = await get(bot, appUrl, '/foo');
163 | t.is(res.text, 'fallthrough');
164 |
165 | res = await get(bot, appUrl, '/bar');
166 | t.is(res.text, 'proxy ' + appUrl + '/bar');
167 | });
168 |
169 | test('forwards proxy error status and body', async (t) => {
170 | // This proxy always returns an error.
171 | const proxyUrl = await listen(
172 | express().use((_req, res) => res.status(500).end('proxy error'))
173 | );
174 | const appUrl = await listen(makeApp({ proxyUrl }));
175 |
176 | const res = await get(bot, appUrl, '/bar');
177 | t.is(res.status, 500);
178 | t.is(res.text, 'proxy error');
179 | });
180 |
181 | test('falls through after timeout', async (t) => {
182 | // This proxy returns after 20ms, but our timeout is 10ms.
183 | const proxyUrl = await listen(
184 | express().use((_req, res) => {
185 | setTimeout(() => res.end('too slow'), 20);
186 | })
187 | );
188 | const appUrl = await listen(makeApp({ proxyUrl, timeout: 10 }));
189 |
190 | const res = await get(bot, appUrl, '/foo');
191 | t.is(res.text, 'fallthrough');
192 | });
193 |
194 | test('forwards request to allowed host', async (t) => {
195 | const forwardedHost = 'example.com';
196 |
197 | const proxyUrl = await listen(makeProxy());
198 | const appUrl = await listen(
199 | makeApp({
200 | proxyUrl,
201 | allowedForwardedHosts: [forwardedHost],
202 | })
203 | );
204 |
205 | const forwardedUrl = new URL(appUrl);
206 | forwardedUrl.host = forwardedHost;
207 | forwardedUrl.port = '';
208 | forwardedUrl.pathname = '/foo';
209 |
210 | const res = await get(bot, appUrl, '/foo', {
211 | 'X-Forwarded-Host': forwardedHost,
212 | });
213 | t.is(res.status, 200);
214 | t.is(res.text, 'proxy ' + forwardedUrl.href);
215 | });
216 |
217 | test('ignores forwarded host that is not allowed', async (t) => {
218 | const proxyUrl = await listen(makeProxy());
219 | const appUrl = await listen(
220 | makeApp({
221 | proxyUrl,
222 | allowedForwardedHosts: ['example.com'],
223 | })
224 | );
225 |
226 | const res = await get(bot, appUrl, '/foo', {
227 | 'X-Forwarded-Host': 'malicious.com',
228 | });
229 | t.is(res.status, 200);
230 | t.is(res.text, 'proxy ' + appUrl + '/foo');
231 | });
232 |
--------------------------------------------------------------------------------
/middleware/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "../tsconfig.json",
3 | "compilerOptions": {
4 | "rootDir": "./src",
5 | "outDir": "./build"
6 | },
7 | "include": ["src/**/*.ts"]
8 | }
9 |
--------------------------------------------------------------------------------
/nodemon.json:
--------------------------------------------------------------------------------
1 | {
2 | "ignore": ["**/*.test.ts", ".git", "node_modules"],
3 | "watch": ["src"],
4 | "exec": "npm run build && npm run start",
5 | "ext": "ts"
6 | }
7 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "rendertron",
3 | "version": "3.1.0",
4 | "description": "Renders webpages using headless Chrome for usage by bots",
5 | "license": "Apache-2.0",
6 | "repository": "https://github.com/GoogleChrome/rendertron",
7 | "engines": {
8 | "node": ">=10"
9 | },
10 | "main": "build/rendertron.js",
11 | "types": "build/rendertron.d.ts",
12 | "bin": "bin/rendertron",
13 | "scripts": {
14 | "build": "tsc",
15 | "prepack": "npm run build",
16 | "start": "node build/rendertron.js",
17 | "format": "npm run format:eslint && npm run format:prettier",
18 | "format:eslint": "npm run lint -- --fix",
19 | "format:prettier": "prettier \"**/*.{html,js,json,md,ts}\" --ignore-path .gitignore --write",
20 | "lint": "eslint \"**/*.{js,ts}\" --ignore-path .gitignore",
21 | "monitor": "nodemon",
22 | "monitor-inspect": "nodemon --inspect src/main.js",
23 | "test": "(cd test-resources && npm install) && npm run build && ava build/test/app-test.js --timeout 5s",
24 | "start-emulator": "(gcloud beta emulators datastore start --no-store-on-disk --project emulator-project --host-port localhost:8380 &) 2>&1 | grep -m1 'now running'",
25 | "test-cache": "npm run build && npm run start-emulator && $(gcloud beta emulators datastore env-init) && export GCLOUD_PROJECT=emulator-project && ava build/test/*-cache-test.js"
26 | },
27 | "files": [
28 | "bin/",
29 | "build/",
30 | "!build/test/",
31 | "app.json"
32 | ],
33 | "dependencies": {
34 | "@webcomponents/webcomponentsjs": "^2.5.0",
35 | "chrome-launcher": "^0.13.4",
36 | "chrome-remote-interface": "^0.29.0",
37 | "fs-extra": "^9.1.0",
38 | "koa": "^2.13.1",
39 | "koa-bodyparser": "^4.3.0",
40 | "koa-compress": "^5.0.1",
41 | "koa-logger": "^3.2.1",
42 | "koa-route": "^3.2.0",
43 | "koa-send": "^5.0.1",
44 | "koa-static": "^5.0.0",
45 | "performance-now": "^2.1.0",
46 | "puppeteer": "^10.0.0",
47 | "semver": "^7.3.4"
48 | },
49 | "devDependencies": {
50 | "@google-cloud/datastore": "^6.3.1",
51 | "@types/fs-extra": "^9.0.7",
52 | "@types/koa": "^2.11.6",
53 | "@types/koa-bodyparser": "^4.3.0",
54 | "@types/koa-compress": "^4.0.1",
55 | "@types/koa-logger": "^3.1.1",
56 | "@types/koa-route": "^3.2.4",
57 | "@types/koa-send": "^4.1.2",
58 | "@types/koa-static": "^4.0.1",
59 | "@types/node": "^14.14.10",
60 | "@types/puppeteer": "^5.4.3",
61 | "@types/supertest": "^2.0.10",
62 | "@typescript-eslint/parser": "^4.14.2",
63 | "@typescript-eslint/eslint-plugin": "^4.15.1",
64 | "ava": "^3.15.0",
65 | "eslint": "^7.23.0",
66 | "nodemon": "^2.0.6",
67 | "prettier": "^2.2.1",
68 | "supertest": "^6.1.3",
69 | "typescript": "4.0.5"
70 | },
71 | "prettier": {
72 | "singleQuote": true
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 Google Inc. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may
5 | not
6 | * use this file except in compliance with the License. You may obtain a copy
7 | of
8 | * the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 | * License for the specific language governing permissions and limitations
16 | under
17 | * the License.
18 | */
19 |
20 | 'use strict';
21 |
22 | import * as fse from 'fs-extra';
23 | import * as path from 'path';
24 | import * as os from 'os';
25 |
26 | const CONFIG_PATH = path.resolve(__dirname, '../config.json');
27 |
28 | export type Config = {
29 | cache: 'datastore' | 'memory' | 'filesystem' | null;
30 | cacheConfig: { [key: string]: string };
31 | timeout: number;
32 | port: string;
33 | host: string;
34 | width: number;
35 | height: number;
36 | reqHeaders: { [key: string]: string };
37 | headers: { [key: string]: string };
38 | puppeteerArgs: Array;
39 | renderOnly: Array;
40 | closeBrowser: boolean;
41 | restrictedUrlPattern: string | null;
42 | };
43 |
44 | export class ConfigManager {
45 | public static config: Config = {
46 | cache: null,
47 | cacheConfig: {
48 | snapshotDir: path.join(os.tmpdir(), 'rendertron'),
49 | cacheDurationMinutes: (60 * 24).toString(),
50 | cacheMaxEntries: '100',
51 | },
52 | timeout: 10000,
53 | port: '3000',
54 | host: '0.0.0.0',
55 | width: 1000,
56 | height: 1000,
57 | reqHeaders: {},
58 | headers: {},
59 | puppeteerArgs: ['--no-sandbox'],
60 | renderOnly: [],
61 | closeBrowser: false,
62 | restrictedUrlPattern: null
63 | };
64 |
65 | static async getConfiguration(): Promise {
66 | // Load config.json if it exists.
67 | if (fse.pathExistsSync(CONFIG_PATH)) {
68 | const configJson = await fse.readJson(CONFIG_PATH);
69 |
70 | // merge cacheConfig
71 | const cacheConfig = Object.assign(
72 | ConfigManager.config.cacheConfig,
73 | configJson.cacheConfig
74 | );
75 |
76 | ConfigManager.config = Object.assign(ConfigManager.config, configJson);
77 |
78 | ConfigManager.config.cacheConfig = cacheConfig;
79 | }
80 | return ConfigManager.config;
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/src/datastore-cache.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 Google Inc. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may
5 | not
6 | * use this file except in compliance with the License. You may obtain a copy
7 | of
8 | * the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 | * License for the specific language governing permissions and limitations
16 | under
17 | * the License.
18 | */
19 |
20 | 'use strict';
21 |
22 | import Koa from 'koa';
23 | import { Config, ConfigManager } from './config';
24 |
25 | import { Datastore } from '@google-cloud/datastore';
26 | import { entity } from '@google-cloud/datastore/build/src/entity';
27 |
28 | type CacheContent = {
29 | saved: Date;
30 | expires: Date;
31 | headers: string;
32 | payload: string;
33 | };
34 |
35 | type DatastoreObject = {
36 | [Datastore.KEY]: Record;
37 | };
38 |
39 | export class DatastoreCache {
40 | private datastore: Datastore = new Datastore();
41 | private config: Config = ConfigManager.config;
42 |
43 | async clearCache() {
44 | const query = this.datastore.createQuery('Page');
45 | const data = await query.run();
46 | const entities = data[0];
47 | const entityKeys = entities.map(
48 | (entity: Record) =>
49 | (entity as DatastoreObject)[Datastore.KEY]
50 | );
51 | console.log(`Removing ${entities.length} items from the cache`);
52 | await this.datastore.delete(entityKeys);
53 | // TODO(samli): check info (data[1]) and loop through pages of entities to
54 | // delete.
55 | }
56 |
57 | async cacheContent(
58 | // eslint-disable-next-line @typescript-eslint/ban-types
59 | key: object,
60 | headers: Record,
61 | payload: Buffer
62 | ) {
63 | const now = new Date();
64 | // query datastore to see if we are over the max number of allowed entries, and max entries isn't disabled with a value of -1 and remove over quota, removes oldest first
65 | if (parseInt(this.config.cacheConfig.cacheMaxEntries) !== -1) {
66 | const query = this.datastore
67 | .createQuery('Page')
68 | .select('__key__')
69 | .order('expires');
70 | // eslint-disable-next-line @typescript-eslint/no-this-alias
71 | const self = this;
72 | this.datastore.runQuery(query, function (err, entities) {
73 | if (err) {
74 | console.log(`datastore err: ${err} reported`);
75 | }
76 | const dataStoreCache = (entities || []).map(
77 | (entity: Record) =>
78 | (entity as DatastoreObject)[Datastore.KEY]
79 | );
80 | if (
81 | dataStoreCache.length >=
82 | parseInt(self.config.cacheConfig.cacheMaxEntries)
83 | ) {
84 | const toRemove =
85 | dataStoreCache.length -
86 | parseInt(self.config.cacheConfig.cacheMaxEntries) +
87 | 1;
88 | const toDelete = dataStoreCache.slice(0, toRemove);
89 | console.log(`Deleting: ${toRemove}`);
90 | self.datastore.delete(toDelete);
91 | }
92 | });
93 | }
94 | const entity = {
95 | key: key,
96 | data: [
97 | { name: 'saved', value: now },
98 | {
99 | name: 'expires',
100 | value: new Date(
101 | now.getTime() +
102 | parseInt(this.config.cacheConfig.cacheDurationMinutes) * 60 * 1000
103 | ),
104 | },
105 | {
106 | name: 'headers',
107 | value: JSON.stringify(headers),
108 | excludeFromIndexes: true,
109 | },
110 | {
111 | name: 'payload',
112 | value: JSON.stringify(payload),
113 | excludeFromIndexes: true,
114 | },
115 | ],
116 | };
117 | await this.datastore.save(entity);
118 | }
119 |
120 | async removeEntry(key: string) {
121 | const datastoreKey = this.datastore.key(['Page', key]);
122 | await this.datastore.delete(datastoreKey);
123 | }
124 |
125 | async getCachedContent(ctx: Koa.Context, key: entity.Key) {
126 | if (ctx.query.refreshCache) {
127 | return null;
128 | } else {
129 | return await this.datastore.get(key);
130 | }
131 | }
132 |
133 | /**
134 | * Returns middleware function.
135 | */
136 | middleware() {
137 | const cacheContent = this.cacheContent.bind(this);
138 |
139 | return async function (
140 | this: DatastoreCache,
141 | ctx: Koa.Context,
142 | next: () => Promise
143 | ) {
144 | // Cache based on full URL. This means requests with different params are
145 | // cached separately (except for refreshCache parameter)
146 | let cacheKey = ctx.url.replace(/&?refreshCache=(?:true|false)&?/i, '');
147 |
148 | if (cacheKey.charAt(cacheKey.length - 1) === '?') {
149 | cacheKey = cacheKey.slice(0, -1);
150 | }
151 | const key = this.datastore.key(['Page', cacheKey]);
152 | const results = await this.getCachedContent(ctx, key);
153 | if (results && results.length && results[0] !== undefined) {
154 | const content = results[0] as CacheContent;
155 | // Serve cached content if its not expired.
156 | if (
157 | content.expires.getTime() >= new Date().getTime() ||
158 | parseInt(this.config.cacheConfig.cacheDurationMinutes) === -1
159 | ) {
160 | const headers = JSON.parse(content.headers);
161 | ctx.set(headers);
162 | ctx.set('x-rendertron-cached', content.saved.toUTCString());
163 | try {
164 | let payload = JSON.parse(content.payload);
165 | if (
166 | payload &&
167 | typeof payload === 'object' &&
168 | payload.type === 'Buffer'
169 | ) {
170 | payload = Buffer.from(payload);
171 | }
172 | ctx.body = payload;
173 | return;
174 | } catch (error) {
175 | console.log(
176 | 'Erroring parsing cache contents, falling back to normal render'
177 | );
178 | }
179 | }
180 | }
181 |
182 | await next();
183 |
184 | if (ctx.status === 200) {
185 | cacheContent(key, ctx.response.headers, ctx.body);
186 | }
187 | }.bind(this);
188 | }
189 |
190 | invalidateHandler() {
191 | return this.handleInvalidateRequest.bind(this);
192 | }
193 |
194 | private async handleInvalidateRequest(ctx: Koa.Context, url: string) {
195 | this.removeEntry(url);
196 | ctx.status = 200;
197 | }
198 |
199 | clearAllCacheHandler() {
200 | return this.handleClearAllCacheRequest.bind(this);
201 | }
202 |
203 | private async handleClearAllCacheRequest(ctx: Koa.Context) {
204 | this.clearCache();
205 | ctx.status = 200;
206 | }
207 | }
208 |
--------------------------------------------------------------------------------
/src/filesystem-cache.ts:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2020 Google Inc. All rights reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may
5 | not
6 | * use this file except in compliance with the License. You may obtain a copy
7 | of
8 | * the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15 | * License for the specific language governing permissions and limitations
16 | under
17 | * the License.
18 | */
19 |
20 | 'use strict';
21 |
22 | import { createHash } from 'crypto';
23 |
24 | import * as fs from 'fs';
25 | import * as path from 'path';
26 | import * as Koa from 'koa';
27 | import { Config } from './config';
28 |
29 | type CacheContent = {
30 | saved: Date;
31 | expires: Date;
32 | response: string;
33 | payload: string;
34 | };
35 |
36 | export class FilesystemCache {
37 | private config: Config;
38 | private cacheConfig: { [key: string]: string };
39 |
40 | constructor(config: Config) {
41 | this.config = config;
42 | this.cacheConfig = this.config.cacheConfig;
43 | }
44 |
45 | hashCode = (s: string) => {
46 | const hash = 0;
47 | if (s.length === 0) return hash.toString();
48 |
49 | return createHash('md5').update(s).digest('hex');
50 | };
51 |
52 | getDir = (key: string) => {
53 | const dir = this.cacheConfig.snapshotDir;
54 | if (!fs.existsSync(dir)) {
55 | fs.mkdirSync(dir, { recursive: true });
56 | }
57 |
58 | if (key) {
59 | return path.join(dir, key);
60 | }
61 |
62 | return dir;
63 | };
64 |
65 | async clearCache(key: string) {
66 | let cleanKey = key;
67 | if (!cleanKey.endsWith('.json')) {
68 | cleanKey += '.json';
69 | }
70 | if (fs.existsSync(path.join(this.getDir(''), cleanKey))) {
71 | try {
72 | fs.unlinkSync(path.join(this.getDir(''), cleanKey));
73 | console.log(`deleting: ${path.join(this.getDir(''), cleanKey)}`);
74 | } catch (err) {
75 | console.log(err);
76 | }
77 | }
78 | }
79 |
80 | clearAllCacheHandler() {
81 | return this.handleClearAllCacheRequest.bind(this);
82 | }
83 |
84 | private async handleClearAllCacheRequest(ctx: Koa.Context) {
85 | await this.clearAllCache();
86 | ctx.status = 200;
87 | }
88 |
89 | async clearAllCache() {
90 | return new Promise((resolve) => {
91 | fs.readdir(this.getDir(''), (err, files) => {
92 | if (err) throw err;
93 | for (const file of files) {
94 | fs.unlink(path.join(this.getDir(''), file), (err) => {
95 | if (err) throw err;
96 | });
97 | }
98 | resolve();
99 | });
100 | });
101 | }
102 |
103 | private sortFilesByModDate(numCache: string[]) {
104 | const dirsDate = [];
105 | for (let i = 0; i < numCache.length; i++) {
106 | if (fs.existsSync(path.join(this.getDir(''), numCache[i]))) {
107 | const stats = fs.statSync(path.join(this.getDir(''), numCache[i]));
108 | const mtime = stats.mtime;
109 | dirsDate.push({ fileName: numCache[i], age: mtime.getTime() });
110 | }
111 | }
112 | dirsDate.sort((a, b) => (a.age > b.age ? 1 : -1));
113 | return dirsDate;
114 | }
115 |
116 | cacheContent(key: string, ctx: Koa.Context) {
117 | const responseHeaders = ctx.response;
118 | const responseBody = ctx.body;
119 | const request = ctx.request;
120 | // check size of stored cache to see if we are over the max number of allowed entries, and max entries isn't disabled with a value of -1 and remove over quota, removes oldest first
121 | if (parseInt(this.config.cacheConfig.cacheMaxEntries) !== -1) {
122 | const numCache = fs.readdirSync(this.getDir(''));
123 | if (
124 | numCache.length >= parseInt(this.config.cacheConfig.cacheMaxEntries)
125 | ) {
126 | const toRemove =
127 | numCache.length -
128 | parseInt(this.config.cacheConfig.cacheMaxEntries) +
129 | 1;
130 | let dirsDate = this.sortFilesByModDate(numCache);
131 | dirsDate = dirsDate.slice(0, toRemove);
132 | dirsDate.forEach((rmDir) => {
133 | if (rmDir.fileName !== key + '.json') {
134 | console.log(
135 | `max cache entries reached - removing: ${rmDir.fileName}`
136 | );
137 | this.clearCache(rmDir.fileName);
138 | }
139 | });
140 | }
141 | }
142 | fs.writeFileSync(
143 | path.join(this.getDir(''), key + '.json'),
144 | JSON.stringify({ responseBody, responseHeaders, request })
145 | );
146 | }
147 |
148 | getCachedContent(ctx: Koa.Context, key: string): CacheContent | null {
149 | if (ctx.query.refreshCache) {
150 | return null;
151 | } else {
152 | try {
153 | const cacheFile = JSON.parse(
154 | fs.readFileSync(path.join(this.getDir(''), key + '.json'), 'utf8')
155 | );
156 | const payload = cacheFile.responseBody;
157 | const response = JSON.stringify(cacheFile.responseHeaders);
158 | if (!payload) {
159 | return null;
160 | }
161 | const fd = fs.openSync(path.join(this.getDir(''), key + '.json'), 'r');
162 | const stats = fs.fstatSync(fd);
163 | // use modification time as the saved time
164 | const saved = stats.mtime;
165 | const expires = new Date(
166 | saved.getTime() +
167 | parseInt(this.cacheConfig.cacheDurationMinutes) * 60 * 1000
168 | );
169 | return {
170 | saved,
171 | expires,
172 | payload,
173 | response,
174 | };
175 | } catch (err) {
176 | return null;
177 | }
178 | }
179 | }
180 | invalidateHandler() {
181 | return this.handleInvalidateRequest.bind(this);
182 | }
183 |
184 | sanitizeKey(key: string) {
185 | // Cache based on full URL. This means requests with different params are
186 | // cached separately (except for refreshCache parameter
187 | let cacheKey = key.replace(/&?refreshCache=(?:true|false)&?/i, '');
188 |
189 | if (cacheKey.charAt(cacheKey.length - 1) === '?') {
190 | cacheKey = cacheKey.slice(0, -1);
191 | }
192 |
193 | // remove /render/ from key, only at the start
194 | if (cacheKey.startsWith('/render/')) {
195 | cacheKey = cacheKey.substring(8);
196 | }
197 |
198 | // remove trailing slash from key
199 | cacheKey = cacheKey.replace(/\/$/, '');
200 | return cacheKey
201 | }
202 |
203 | private async handleInvalidateRequest(ctx: Koa.Context, url: string) {
204 | let cacheKey = this.sanitizeKey(url);
205 |
206 | // remove /invalidate/ from key, only at the start
207 | if (cacheKey.startsWith('/invalidate/')) {
208 | cacheKey = cacheKey.substring(12);
209 | }
210 |
211 | // key is hashed crudely
212 | const key = this.hashCode(cacheKey);
213 | this.clearCache(key);
214 | ctx.status = 200;
215 | }
216 |
217 |
218 |
219 | /**
220 | * Returns middleware function.
221 | */
222 | middleware() {
223 | const cacheContent = this.cacheContent.bind(this);
224 |
225 | return async function (
226 | this: FilesystemCache,
227 | ctx: Koa.Context,
228 | next: () => Promise
229 | ) {
230 |
231 | const cacheKey = this.sanitizeKey(ctx.url);
232 | // key is hashed crudely
233 | const key = this.hashCode(cacheKey);
234 | const content = await this.getCachedContent(ctx, key);
235 | if (content) {
236 | // Serve cached content if its not expired.
237 | if (
238 | content.expires.getTime() >= new Date().getTime() ||
239 | parseInt(this.config.cacheConfig.cacheDurationMinutes) === -1
240 | ) {
241 | const response = JSON.parse(content.response);
242 | ctx.set(response.header);
243 | ctx.set('x-rendertron-cached', content.saved.toUTCString());
244 | ctx.status = response.status;
245 | let payload: string | { type?: string } = content.payload;
246 | try {
247 | payload = JSON.parse(content.payload);
248 | } catch (e) {
249 | // swallow this.
250 | }
251 | try {
252 | if (
253 | payload &&
254 | typeof payload === 'object' &&
255 | payload.type === 'Buffer'
256 | ) {
257 | ctx.body = Buffer.from(payload);
258 | } else {
259 | ctx.body = payload;
260 | }
261 | return;
262 | } catch (error) {
263 | console.log(
264 | 'Erroring parsing cache contents, falling back to normal render'
265 | );
266 | }
267 | }
268 | }
269 |
270 | await next();
271 |
272 | if (ctx.status === 200) {
273 | cacheContent(key, ctx);
274 | }
275 | }.bind(this);
276 | }
277 | }
278 |
--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
24 |
25 | Rendertron
26 |
30 |
31 |
35 |
39 |
40 |
64 |
65 |
223 |
224 |
225 |
226 |