├── .eslintrc.json ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── app.json ├── app.yaml ├── bin └── rendertron ├── docs ├── api-reference.md ├── configure.md ├── deploy.md ├── index.md ├── server-setup.md └── server-setup │ ├── apache.md │ ├── expressjs.md │ └── nginx.md ├── middleware ├── CHANGELOG.md ├── LICENSE ├── README.md ├── package-lock.json ├── package.json ├── src │ ├── middleware.ts │ └── test │ │ └── middleware-test.ts └── tsconfig.json ├── nodemon.json ├── package-lock.json ├── package.json ├── src ├── config.ts ├── datastore-cache.ts ├── filesystem-cache.ts ├── index.html ├── memory-cache.ts ├── renderer.ts ├── rendertron.ts └── test │ ├── app-test.ts │ ├── datastore-cache-test.ts │ ├── filesystem-cache-test.ts │ └── memory-cache-test.ts ├── test-resources ├── basic-script.html ├── custom-element.html ├── explicit-render-event.html ├── http-meta-status-code-multiple.html ├── http-meta-status-code.html ├── include-base-as-directory.html ├── include-base.html ├── include-date.html ├── include-doctype.html ├── include-json-ld.html ├── include-script.html ├── inject-element-after-load.js ├── inject-element-module.js ├── inject-element.js ├── package-lock.json ├── package.json ├── request-header.html ├── restrict-test.test.html ├── script-after-load.html ├── shadow-dom-no-polyfill.html ├── shadow-dom-polyfill-all.html └── shadow-dom-polyfill-loader.html └── tsconfig.json /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "eslint:recommended", 4 | "plugin:@typescript-eslint/eslint-recommended", 5 | "plugin:@typescript-eslint/recommended" 6 | ], 7 | "parser": "@typescript-eslint/parser", 8 | "parserOptions": { 9 | "ecmaVersion": 2020, 10 | "sourceType": "module" 11 | }, 12 | "plugins": ["@typescript-eslint"], 13 | "rules": { 14 | "@typescript-eslint/explicit-module-boundary-types": "off" 15 | }, 16 | "overrides": [ 17 | { 18 | "files": "test-resources/**/*", 19 | "env": { 20 | "browser": true 21 | } 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @AVGP 2 | /middleware/ @aomarks 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'npm' 4 | directory: '/' 5 | schedule: 6 | interval: 'daily' 7 | versioning-strategy: increase 8 | - package-ecosystem: 'npm' 9 | directory: '/middleware/' 10 | schedule: 11 | interval: 'daily' 12 | versioning-strategy: increase 13 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the main branch 7 | on: 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 14 | jobs: 15 | lint: 16 | name: Lint 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | 23 | - uses: actions/setup-node@v1 24 | with: 25 | node-version: 14 26 | 27 | - run: npm ci 28 | 29 | - run: npm run lint 30 | 31 | test: 32 | name: Integration tests 33 | 34 | runs-on: ubuntu-latest 35 | 36 | strategy: 37 | matrix: 38 | node: ['10', '12', '14'] 39 | 40 | steps: 41 | - uses: actions/checkout@v2 42 | 43 | - uses: actions/setup-node@v1 44 | with: 45 | node-version: ${{ matrix.node }} 46 | 47 | - run: npm ci 48 | 49 | - run: npm test 50 | 51 | test-cache: 52 | name: e2e cache tests 53 | 54 | runs-on: ubuntu-latest 55 | 56 | strategy: 57 | matrix: 58 | node: ['10', '12', '14'] 59 | 60 | steps: 61 | - uses: actions/checkout@v2 62 | 63 | - uses: actions/setup-node@v1 64 | with: 65 | node-version: ${{ matrix.node }} 66 | 67 | - name: Install dependencies 68 | run: | 69 | npm ci 70 | export CLOUDSDK_CORE_DISABLE_PROMPTS=1 71 | sudo apt-get update && sudo apt-get install google-cloud-sdk google-cloud-sdk-datastore-emulator 72 | mkdir ./tmp 73 | echo '{"cacheConfig": { "snapshotDir": "./tmp/rendertron" } }' > ./config.json 74 | 75 | - name: Run the e2e cache tests 76 | run: npm run test-cache 77 | 78 | test-middleware: 79 | name: Middleware tests 80 | 81 | runs-on: ubuntu-latest 82 | 83 | strategy: 84 | matrix: 85 | node: ['10', '12', '14'] 86 | 87 | steps: 88 | - uses: actions/checkout@v2 89 | 90 | - uses: actions/setup-node@v1 91 | with: 92 | node-version: ${{ matrix.node }} 93 | 94 | - run: cd middleware/ 95 | 96 | - run: npm ci 97 | 98 | - run: npm run test 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | node_modules/ 3 | test.jpeg 4 | yarn.lock 5 | *.tgz 6 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Launch Program", 11 | "program": "${workspaceFolder}/build/rendertron.js" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "editor.formatOnType": true, 4 | "search.useIgnoreFiles": false, 5 | "search.exclude": { 6 | "node_modules/": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | 5 | ## [3.1.0] 2020-11-04 6 | 7 | ### Security improvements 8 | 9 | - Prevents rendering of \*.internal URLs, patching an issue with GCE. 10 | - Adds allow-listing option to restrict rendering to a given list of domains or URL patterns. 11 | 12 | ### Features 13 | 14 | - Include Heroku deploy documentation and an interactive "Click to deploy" for Heroku 15 | - Introduces an API endpoint to clear all cache 16 | - Adds timezone support 17 | - Adds optional forced browser restart between renders 18 | - Adds documentation to deploy Rendertron via Docker 19 | - Adds option to add request headers to the rendered pages in Rendertron 20 | 21 | ### Improvements 22 | 23 | - Fixes a bug in the filesystem cache 24 | - Fixes issue with the injected base tag 25 | - Updates all dependencies 26 | - Increased the required node.js version to Node.js 10+ 27 | - Updates to the FaQ 28 | 29 | ## [3.0.0] 2020-07-02 30 | 31 | - Introduces new configuration file format for the `config.json` options (see [README.md](./README.md)) 32 | - Introduces new cache providers: 33 | - In-memory cache 34 | - File system cache 35 | - Introduces API endpoint to invalidate cache for a URL 36 | - Introduces a number of new configuration options 37 | - Introduces `refreshCache` parameter to force cache update for a URL 38 | - Relaunches browser when the browser disconnects from Puppeteer 39 | - Now includes doctype in rendered output 40 | - Harmonises the configuration options for caches 41 | - Closes page after screenshot 42 | - Fixes security issue with AppEngine deployments 43 | - Fixes issue with specifying host and port 44 | 45 | ## [2.0.1] 2018-09-18 46 | 47 | - Remove testing and other files from NPM package. 48 | - Fix NPM main config. 49 | - Improved restrictions for endpoints. 50 | - Support for structured data by not stripping all script tags. 51 | 52 | ## [2.0.0] 2018-07-26 53 | 54 | - Rebuilt with Puppeteer under the hood 55 | - Rebuilt as Koa server instead of an Express server 56 | - Rebuilt using Typescript 57 | - Removed explicit rendering flag 58 | - Added support for a mobile parameter 59 | - Added more options for screenshots 60 | 61 | ## [1.1.1] 2018-01-05 62 | 63 | - Update `debug` flag to log requested URLs to render 64 | - Fix for renderComplete flag 65 | - Minor bug fixes 66 | 67 | ## [1.1.0] 2017-10-27 68 | 69 | - Initial release on NPM 70 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [How to Contribute](#how-to-contribute) 4 | - [Contributor License Agreement](#contributor-license-agreement) 5 | - [Getting Code](#getting-code) 6 | - [Code reviews](#code-reviews) 7 | - [Code Style](#code-style) 8 | - [Adding New Dependencies](#adding-new-dependencies) 9 | - [Running & Writing Tests](#running--writing-tests) 10 | 11 | 12 | # How to Contribute 13 | 14 | First of all, thank you for your interest in Rendertron! 15 | We'd love to accept your patches and contributions! 16 | 17 | ## Contributor License Agreement 18 | 19 | Contributions to this project must be accompanied by a Contributor License 20 | Agreement. You (or your employer) retain the copyright to your contribution, 21 | this simply gives us permission to use and redistribute your contributions as 22 | part of the project. Head over to to see 23 | your current agreements on file or to sign a new one. 24 | 25 | You generally only need to submit a CLA once, so if you've already submitted one 26 | (even if it was for a different project), you probably don't need to do it 27 | again. 28 | 29 | ## Getting Code 30 | 31 | 1. Clone this repository 32 | 33 | ```bash 34 | git clone https://github.com/GoogleChrome/rendertron 35 | cd rendertron 36 | ``` 37 | 38 | 2. Install dependencies 39 | 40 | ```bash 41 | npm install 42 | ``` 43 | 44 | 3. Run tests locally. For more information about tests, read [Running & Writing Tests](#running--writing-tests). 45 | 46 | ```bash 47 | npm test 48 | ``` 49 | 50 | 4. (Optional) when developing cache related stuff you will need `npm run test-cache` commands for tests. 51 | - This requires the Google Cloud SDK, follow the steps described [here](https://cloud.google.com/sdk/docs/#install_the_latest_cloud_tools_version_cloudsdk_current_version) to install 52 | - run `gcloud components install beta cloud-datastore-emulator` 53 | - if you do not have Java 8+ JRE installed, you should install it too, as the Google Cloud Datastore emulator requires it 54 | 55 | ## Code reviews 56 | 57 | All submissions, including submissions by project members, require review. We 58 | use GitHub pull requests for this purpose. Consult 59 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 60 | information on using pull requests. 61 | 62 | ## Code Style 63 | 64 | - Coding style is fully defined in [tslint.json](https://github.com/GoogleChrome/rendertron/blob/master/tslint.json) 65 | - Comments should be generally avoided. If the code would not be understood without comments, consider re-writing the code to make it self-explanatory. 66 | 67 | To run code linter, use: 68 | 69 | ```bash 70 | npm run lint 71 | ``` 72 | 73 | ## Adding New Dependencies 74 | 75 | For all dependencies (both installation and development): 76 | 77 | - **Do not add** a dependency if the desired functionality is easily implementable. 78 | - If adding a dependency, it should be well-maintained and trustworthy. 79 | 80 | A barrier for introducing new installation dependencies is especially high: 81 | 82 | - **Do not add** installation dependency unless it's critical to project success. 83 | 84 | ## Running & Writing Tests 85 | 86 | - Every feature should be accompanied by a test. 87 | - Tests should be _hermetic_. Tests should not depend on external services unless absolutely needed. 88 | - Tests should work on all three platforms: Mac, Linux and Windows. 89 | 90 | - To run all tests: 91 | 92 | ```bash 93 | npm test 94 | ``` 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rendertron 2 | 3 | [![CI](https://github.com/GoogleChrome/rendertron/workflows/CI/badge.svg)](https://github.com/GoogleChrome/rendertron/actions) 4 | [![NPM package](https://img.shields.io/npm/v/rendertron.svg)](https://npmjs.org/package/rendertron) 5 | 6 | 7 | ## Rendertron is deprecated 8 | 9 | Please note that this project is **deprecated**. Dynamic rendering [is not a recommended approach](https://developers.google.com/search/docs/crawling-indexing/javascript/dynamic-rendering) and [there are better approaches to rendering on the web](https://web.dev/rendering-on-the-web/). 10 | 11 | **Rendertron will not be actively maintained at this point.** 12 | 13 | > Rendertron is a headless Chrome rendering solution designed to render & serialise web pages on the fly. 14 | 15 | #### :hammer: Built with [Puppeteer](https://github.com/GoogleChrome/puppeteer) 16 | 17 | #### :cloud: Easy deployment to Google Cloud 18 | 19 | #### :mag: Improves SEO 20 | 21 | Rendertron is designed to enable your Progressive Web App (PWA) to serve the correct 22 | content to any bot that doesn't render or execute JavaScript. Rendertron runs as a 23 | standalone HTTP server. Rendertron renders requested pages using Headless Chrome, 24 | [auto-detecting](#auto-detecting-loading-function) when your PWA has completed loading 25 | and serializes the response back to the original request. To use Rendertron, your application 26 | configures [middleware](#middleware) to determine whether to proxy a request to Rendertron. 27 | Rendertron is compatible with all client side technologies, including [web components](#web-components). 28 | 29 | ## Contents 30 | 31 | - [Middleware](#middleware) 32 | - [API](#api) 33 | - [Render](#render) 34 | - [Screenshot](#screenshot) 35 | - [Invalidate cache](#invalidate-cache) 36 | - [FAQ](#faq) 37 | - [Query parameters](#query-parameters) 38 | - [Page render timing](#page-render-timing) 39 | - [Rendering budget timeout](#rendering-budget-timeout) 40 | - [Web components](#web-components) 41 | - [Status codes](#status-codes) 42 | - [Installing & deploying](#installing--deploying) 43 | - [Building](#building) 44 | - [Running locally](#running-locally) 45 | - [Deploying to Google Cloud Platform](#deploying-to-google-cloud-platform) 46 | - [Deploying using Docker](#deploying-using-docker) 47 | - [Config](#config) 48 | - [Troubleshooting](#troubleshooting) 49 | 50 | ## Middleware 51 | 52 | Once you have the service up and running, you'll need to implement the differential serving 53 | layer. This checks the user agent to determine whether prerendering is required. 54 | 55 | This is a list of middleware available to use with the Rendertron service: 56 | 57 | - [Express.js middleware](/middleware) 58 | - [Firebase functions](https://github.com/justinribeiro/pwa-firebase-functions-botrender) (Community maintained) 59 | - [ASP.net core middleware](https://github.com/galamai/AspNetCore.Rendertron) (Community maintained) 60 | - [Python (Django) middleware and decorator](https://github.com/frontendr/python-rendertron) (Community maintained) 61 | 62 | Rendertron is also compatible with [prerender.io middleware](https://prerender.io/documentation/install-middleware). 63 | Note: the user agent lists differ there. 64 | 65 | ## API 66 | 67 | ### Render 68 | 69 | ``` 70 | GET /render/ 71 | ``` 72 | 73 | The `render` endpoint will render your page and serialize your page. Options are 74 | specified as query parameters: 75 | 76 | - `mobile` defaults to `false`. Enable by passing `?mobile` to request the 77 | mobile version of your site. 78 | - `refreshCache`: Pass `refreshCache=true` to ignore potentially cached render results 79 | and treat the request as if it is not cached yet. 80 | The new render result is used to replace the previous result. 81 | 82 | ### Screenshot 83 | 84 | ``` 85 | GET /screenshot/ 86 | POST /screenshot/ 87 | ``` 88 | 89 | The `screenshot` endpoint can be used to verify that your page is rendering 90 | correctly. 91 | 92 | Both endpoints support the following query parameters: 93 | 94 | - `width` defaults to `1000` - specifies viewport width. 95 | - `height` defaults to `1000` - specifies viewport height. 96 | - `mobile` defaults to `false`. Enable by passing `?mobile` to request the 97 | mobile version of your site. 98 | - `timezoneId` - specifies rendering for timezone. 99 | 100 | Additional options are available as a JSON string in the `POST` body. See 101 | [Puppeteer documentation](https://github.com/GoogleChrome/puppeteer/blob/v1.6.0/docs/api.md#pagescreenshotoptions) 102 | for available options. You cannot specify the `type` (defaults to `jpeg`) and 103 | `encoding` (defaults to `binary`) parameters. 104 | 105 | ### Invalidate cache 106 | 107 | ``` 108 | GET /invalidate/ 109 | ``` 110 | 111 | The `invalidate` endpoint will remove cache entried for `` from the configured cache (in-memory, filesystem or cloud datastore). 112 | 113 | ## FAQ 114 | 115 | ### Query parameters 116 | 117 | When setting query parameters as part of your URL, ensure they are encoded correctly. In JS, 118 | this would be `encodeURIComponent(myURLWithParams)`. For example to specify `page=home`: 119 | 120 | ``` 121 | https://render-tron.appspot.com/render/http://my.domain/%3Fpage%3Dhome 122 | ``` 123 | 124 | ### Page render timing 125 | 126 | The service attempts to detect when a page has loaded by looking at the page load event, ensuring there 127 | are no outstanding network requests and that the page has had ample time to render. 128 | 129 | ### Rendering budget timeout 130 | 131 | There is a hard limit of 10 seconds for rendering. Ensure you don't hit this budget by ensuring 132 | your application is rendered well before the budget expires. 133 | 134 | ### Web components 135 | 136 | Headless Chrome supports web components but shadow DOM is difficult to serialize effectively. 137 | As such, [shady DOM](https://github.com/webcomponents/shadydom) (a lightweight shim for Shadow DOM) 138 | is required for web components. 139 | 140 | If you are using web components v0 (deprecated), you will need to enable Shady DOM to 141 | render correctly. In Polymer 1.x, which uses web components v0, Shady DOM is enabled by default. 142 | If you are using Shadow DOM, override this by setting the query parameter `dom=shady` when 143 | directing requests to the Rendertron service. 144 | 145 | If you are using web components v1 and either `webcomponents-lite.js` or `webcomponents-loader.js`, 146 | set the query parameter `wc-inject-shadydom=true` when directing requests to the Rendertron 147 | service. This renderer service will force the necessary polyfills to be loaded and enabled. 148 | 149 | ### Status codes 150 | 151 | Status codes from the initial requested URL are preserved. If this is a 200, or 304, you can 152 | set the HTTP status returned by the rendering service by adding a meta tag. 153 | 154 | ```html 155 | 156 | ``` 157 | 158 | ## Running locally 159 | 160 | To install Rendertron and run it locally, first install Rendertron: 161 | 162 | ```bash 163 | npm install -g rendertron 164 | ``` 165 | 166 | With Chrome installed on your machine run the Rendertron CLI: 167 | 168 | ```bash 169 | rendertron 170 | ``` 171 | 172 | ## Installing & deploying 173 | 174 | ### Building 175 | 176 | Clone and install dependencies: 177 | 178 | ```bash 179 | git clone https://github.com/GoogleChrome/rendertron.git 180 | cd rendertron 181 | npm install 182 | npm run build 183 | ``` 184 | 185 | ### Running locally 186 | 187 | With a local instance of Chrome installed, you can start the server locally: 188 | 189 | ```bash 190 | npm run start 191 | ``` 192 | 193 | ### Deploying to Google Cloud Platform 194 | 195 | ``` 196 | gcloud app deploy app.yaml --project 197 | ``` 198 | 199 | ### Deploying using Docker 200 | 201 | Rendertron no longer includes a Docker file. Instead, refer to 202 | [Puppeteer documentation](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker) 203 | on how to deploy run headless Chrome in Docker. 204 | 205 | ### Config 206 | 207 | When deploying the service, set configuration variables by including a `config.json` in the 208 | root. Available configuration options: 209 | 210 | - `timeout` _default `10000`_ - set the timeout used to render the target page. 211 | - `port` _default `3000`_ - set the port to use for running and listening the rendertron service. Note if process.env.PORT is set, it will be used instead. 212 | - `host` _default `0.0.0.0`_ - set the hostname to use for running and listening the rendertron service. Note if process.env.HOST is set, it will be used instead. 213 | - `width` _default `1000`_ - set the width (resolution) to be used for rendering the page. 214 | - `height` _default `1000`_ - set the height (resolution) to be used for rendering the page. 215 | - `reqHeaders` _default `{}`_ - set the additional HTTP headers to be sent to the target page with every request. 216 | - `cache` _default `null`_ - set to `datastore` to enable caching on Google Cloud using datastore _only use if deploying to google cloud_, `memory` to enable in-memory caching or `filesystem` to enable disk based caching 217 | - `cacheConfig` - an object array to specify caching options 218 | - `renderOnly` - restrict the endpoint to only service requests for certain domains. Specified as an array of strings. eg. `['http://render.only.this.domain']`. This is a strict prefix match, so ensure you specify the exact protocols that will be used (eg. http, https). 219 | - `closeBrowser`_default `false`_ - `true` forces the browser to close and reopen between each page render, some sites might need this to prevent URLs past the first one rendered returning null responses. 220 | - `restrictedUrlPattern`_default `null`_ - set the restrictedUrlPattern to restrict the requests matching given regex pattern. 221 | 222 | #### cacheConfig 223 | 224 | - `cacheDurationMinutes` _default `1440`_ - set an expiry time in minues, defaults to 24 hours. Set to -1 to disable cache Expiration 225 | - `cacheMaxEntries` _default `100`_ - set the maximum number of entries stored in the selected cache method. Set to `-1` to allow unlimited caching. If using the datastore caching method, setting this value over `1000` may lead to degraded performance as the query to determine the size of the cache may be too slow. If you want to allow a larger cache in `datastore` consider setting this to `-1` and managing the the size of your datastore using a method like this [Deleting Entries in Bulk](https://cloud.google.com/datastore/docs/bulk-delete) 226 | - `snapshotDir` _default `/renderton`_ - **filesystem only** the directory the rendertron cache files will be stored in 227 | 228 | ##### Example 229 | 230 | An example config file specifying a memory cache, with a 2 hour expiration, and a maximum of 50 entries 231 | 232 | ```javascript 233 | { 234 | "cache": "memory", 235 | "cacheConfig": { 236 | "cacheDurationMinutes": 120, 237 | "cacheMaxEntries": 50 238 | } 239 | } 240 | ``` 241 | 242 | ### Troubleshooting 243 | 244 | If you're having troubles with getting Headless Chrome to run in your 245 | environment, refer to the 246 | [troubleshooting guide](https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md) 247 | for Puppeteer. 248 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Rendertron", 3 | "description": "Rendertron is a headless Chrome rendering solution designed to render & serialise web pages on the fly.", 4 | "keywords": ["rendertron", "render", "web", "chrome"], 5 | "website": "https://github.com/GoogleChrome/rendertron", 6 | "buildpacks": [ 7 | { 8 | "url": "heroku/google-chrome" 9 | }, 10 | { 11 | "url": "heroku/nodejs" 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | runtime: nodejs12 2 | instance_class: F4_1G 3 | automatic_scaling: 4 | min_instances: 1 5 | env_variables: 6 | DISABLE_LEGACY_METADATA_SERVER_ENDPOINTS: "true" 7 | -------------------------------------------------------------------------------- /bin/rendertron: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | process.title = 'rendertron'; 6 | 7 | var semver = require('semver'); 8 | 9 | if (!semver.satisfies(process.version, '>=10')) { 10 | console.log('Rendertron requires Node 10+'); 11 | process.exit(1); 12 | } 13 | 14 | const Rendertron = require('../build/rendertron.js').Rendertron; 15 | const rendertron = new Rendertron(); 16 | rendertron.initialize(); 17 | -------------------------------------------------------------------------------- /docs/api-reference.md: -------------------------------------------------------------------------------- 1 | ## Rendertron API Reference 2 | 3 | ### HTTP API endpoints 4 | 5 | `/render` 6 | 7 | Fetch and serialize a URL in headless Chrome. 8 | 9 | | param | type | description | 10 | | ------------ | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 11 | | `url` | `String` | a valid URL to fetch | 12 | | `opts` | `Object` | `Renderer` config class options | 13 | | `timezoneId` | `String` | specify timezoneId from [list](https://source.chromium.org/chromium/chromium/deps/icu.git/+/faee8bc70570192d82d2978a71e2a615788597d1:source/data/misc/metaZones.txt) with a querystring appended to the requested URL. | 14 | 15 | `/screenshot` 16 | 17 | Return a screenshot of the requested URL 18 | 19 | ```javascript 20 | async screenshot( 21 | url: string, 22 | isMobile: boolean, 23 | dimensions: ViewportDimensions, 24 | options?: object, 25 | timezoneId?: string): Promise 26 | } 27 | ``` 28 | 29 | | param | type | description | 30 | | ------------ | ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 31 | | `url` | `String` | A valid URL to fetch | 32 | | `isMobile` | `Bool` | Specify a mobile layout with a querystring automatically appended to the requested URL. | 33 | | `dimensions` | [`ViewportDimensions`](viewport-dimensions) | `height` and `width` specifications for the rendered page | 34 | | `options` | `Object` | define screenshot params | 35 | | `timezoneId` | `String` | define timezoneId from [list](https://source.chromium.org/chromium/chromium/deps/icu.git/+/faee8bc70570192d82d2978a71e2a615788597d1:source/data/misc/metaZones.txt) | | 36 | 37 | `/invalidate/` 38 | 39 | Removes the cached response for a given URL from the cache. 40 | 41 | | param | type | description | 42 | | ----- | -------- | ------------------------------------ | 43 | | `url` | `String` | A valid URL to remove from the cache | 44 | 45 | `/_ah/health` 46 | 47 | Returns HTTP 200 and text "OK", if the Rendertron server is healthy. 48 | 49 | ### Rendertron internal API 50 | 51 | #### `Renderer` 52 | 53 | Create a `puppeteer` instance to render the requested URL. Uses default `Config` 54 | class or a user-defined `Config` file. 55 | 56 | ```javascript 57 | export class Renderer { 58 | private browser: puppeteer.Browser; 59 | private config: Config; 60 | 61 | constructor(browser: puppeteer.Browser, config: Config) { 62 | this.browser = browser; 63 | this.config = config; 64 | } 65 | ``` 66 | 67 | #### `Config` 68 | 69 | The `Config` class defaults can be overridden with your own settings. 70 | [More details](https://github.com/GoogleChrome/rendertron/blob/master/docs/configure.md) 71 | 72 | ```javascript 73 | public static config: Config = { 74 | cache: null, 75 | timeout: 10000, 76 | port: '3000', 77 | host: '0.0.0.0', 78 | width: 1000, 79 | height: 1000, 80 | headers: {} 81 | }; 82 | ``` 83 | 84 | #### `ViewportDimensions` 85 | 86 | An Object setting the width and height of the requested resource. 87 | 88 | ```javascript 89 | type ViewportDimensions = { 90 | width: number, 91 | height: number, 92 | }; 93 | ``` 94 | 95 | #### `Options` 96 | 97 | Specify the screenshot file type. 98 | 99 | ```javascript 100 | const screenshotOptions = Object.assign({}, options, { 101 | type: 'jpeg', 102 | encoding: 'binary', 103 | }); 104 | ``` 105 | 106 | `/invalidate` 107 | 108 | Invalidate all cache entries present in the configured cache (memory, filesystem or cloud datastore). 109 | (Only available if cache is configured) 110 | 111 | `/invalidate` 112 | 113 | Invalidate a cache entry from memory, filesystem or cloud datastore. 114 | (Only available if cache is configured) 115 | 116 | | param | type | description | 117 | | ----- | -------- | -------------------------- | 118 | | `url` | `String` | URL to invalidate in cache | 119 | 120 | ### 121 | -------------------------------------------------------------------------------- /docs/configure.md: -------------------------------------------------------------------------------- 1 | # Config 2 | 3 | When deploying the service, set configuration variables by including a `config.json` in the 4 | root. Available configuration options: 5 | 6 | - `timeout` _default `10000`_ - set the timeout used to render the target page. 7 | - `port` _default `3000`_ - set the port to use for running and listening the rendertron service. Note if process.env.PORT is set, it will be used instead. 8 | - `host` _default `0.0.0.0`_ - set the hostname to use for running and listening the rendertron service. Note if process.env.HOST is set, it will be used instead. 9 | - `width` _default `1000`_ - set the width (resolution) to be used for rendering the page. 10 | - `height` _default `1000`_ - set the height (resolution) to be used for rendering the page. 11 | - `cache` _default `null`_ - set to `datastore` to enable caching on Google Cloud using datastore _only use if deploying to google cloud_, `memory` to enable in-memory caching or `filesystem` to enable disk based caching 12 | - `cacheConfig` - an object array to specify caching options 13 | - `renderOnly` - restrict the endpoint to only service requests for certain domains. Specified as an array of strings. eg. `['http://render.only.this.domain']`. This is a strict prefix match, so ensure you specify the exact protocols that will be used (eg. http, https). 14 | - `closeBrowser`_default `false`_ - `true` forces the browser to close and reopen between each page render, some sites might need this to prevent URLs past the first one rendered returning null responses. 15 | 16 | ## cacheConfig 17 | 18 | - `cacheDurationMinutes` _default `1440`_ - set an expiry time in minues, defaults to 24 hours. Set to -1 to disable cache Expiration 19 | - `cacheMaxEntries` _default `100`_ - set the maximum number of entries stored in the selected cache method. Set to `-1` to allow unlimited caching. If using the datastore caching method, setting this value over `1000` may lead to degraded performance as the query to determine the size of the cache may be too slow. If you want to allow a larger cache in `datastore` consider setting this to `-1` and managing the the size of your datastore using a method like this [Deleting Entries in Bulk](https://cloud.google.com/datastore/docs/bulk-delete) 20 | - `snapshotDir` _default `/renderton`_ - **filesystem only** the directory the rendertron caches will be stored in 21 | 22 | ### Example 23 | 24 | An example config file specifying a memory cache, with a 2 hour expiration, and a maximum of 50 entries 25 | 26 | ```javascript 27 | { 28 | "cache": "memory", 29 | "cacheConfig": { 30 | "cacheDurationMinutes": 120, 31 | "cacheMaxEntries": 50 32 | } 33 | } 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/deploy.md: -------------------------------------------------------------------------------- 1 | # Deploying Rendertron to Google Cloud Platform 2 | 3 | Rendertron runs a server that takes a URL and returns static HTML for the URL by using headless Chromium. This guide follows 4 | `https://github.com/GoogleChrome/rendertron#deploying-to-google-cloud-platform` 5 | 6 | - To clone the Rendertron repository from GitHub, run the following command: 7 | `git clone https://github.com/GoogleChrome/rendertron.git` 8 | 9 | - Change directories: 10 | `cd rendertron` 11 | 12 | - To install dependencies and build Rendertron on your computer, run the following command: 13 | `npm install && npm run build` 14 | 15 | - Create a new file called config.json in the rendertron directory with the following content to enable Rendertron's cache: 16 | `{ "datastoreCache": true }` 17 | 18 | - From the rendertron directory, run the following command. Replace YOUR_PROJECT_ID with your project ID that you set in Google Cloud Platform. 19 | `gcloud app deploy app.yaml --project YOUR_PROJECT_ID` 20 | 21 | - Select a region of your choice and confirm the deployment. Wait for the command to finish. 22 | 23 | - Enter YOUR_PROJECT_ID.appspot.com in your browser. Replace YOUR_PROJECT_ID with your actual project ID that you set in Google Cloud Platform. You should see Rendertron's interface with an input field and a few buttons. 24 | 25 | - When you see the Rendertron web interface, you have successfully deployed your own Rendertron instance. Take note of your project's URL (YOUR_PROJECT_ID.appspot.com) as you will need it later. 26 | 27 | # Deploying Rendertron to Heroku 28 | 29 | [![Deploy](https://www.herokucdn.com/deploy/button.png)](https://dashboard.heroku.com/new?button-url=https://github.com/GoogleChrome/rendertron/tree/main&template=https://github.com/GoogleChrome/rendertron/tree/main) 30 | 31 | Setup Herokuapp and Heroku CLI 32 | `https://devcenter.heroku.com/articles/heroku-cli` 33 | 34 | First, add the Google Chrome buildpack to your project: 35 | 36 | ``` 37 | $ heroku buildpacks:set https://github.com/heroku/heroku-buildpack-google-chrome.git -a 38 | ``` 39 | 40 | Next, add the `heroku/nodejs` buildpack to your project: 41 | 42 | ``` 43 | $ heroku buildpacks:add --index 2 heroku/nodejs -a 44 | ``` 45 | 46 | Then, update the `package.json` entry for `engines` to specific node and npm versions. I used: 47 | 48 | ``` 49 | { 50 | ... 51 | "engines": { 52 | "node": "10.15.1", 53 | "npm": "6.4.1" 54 | }, 55 | ... 56 | } 57 | ``` 58 | 59 | This was helpful in getting past a `node-gyp` issue during `npm install`, which Heroku will run each time you deploy. 60 | 61 | Next, enter a new script into your `package.json`: 62 | 63 | ``` 64 | { 65 | "scripts": { 66 | ..., 67 | "heroku-postbuild": "npm run build" 68 | } 69 | } 70 | ``` 71 | 72 | This will make sure to build rendertron into `bin/rendertron` on each deploy, in case you have any local changes. 73 | 74 | Finally, add a `Procfile` to your project with the following: 75 | 76 | ``` 77 | web: node bin/rendertron 78 | ``` 79 | 80 | # Deploying Rendertron in a docker container 81 | 82 | Based on Puppeteer instructions we can create a docker image that bundles a headless chrome and rendertron. We can start from node 14 base image. 83 | 84 | For more information about chrome installation please see the pupeteer page: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-in-docker 85 | 86 | If you don't want to use rendertron default configurations you can create a config.json file. This file must be created at the project root level, in the same directory as the Dockerfile. 87 | 88 | ``` 89 | { 90 | "cache": "filesystem", 91 | "cacheConfig": { 92 | "cacheDurationMinutes": 7200, 93 | "cacheMaxEntries": 1000, 94 | "snapshotDir": "/cache" 95 | } 96 | } 97 | ``` 98 | 99 | Then we can define the Dockerfile like this: 100 | 101 | ``` 102 | FROM node:14.11.0-stretch 103 | 104 | RUN apt-get update \ 105 | && apt-get install -y wget gnupg \ 106 | && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ 107 | && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ 108 | && apt-get update \ 109 | && apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 \ 110 | --no-install-recommends \ 111 | && rm -rf /var/lib/apt/lists/* 112 | 113 | # This directoty will store cached files as specified in the config.json. 114 | # If you haven't defined the cacheConfig.snapshotDir property you can remove the following line 115 | RUN mkdir /cache 116 | 117 | RUN git clone https://github.com/GoogleChrome/rendertron.git 118 | 119 | WORKDIR /rendertron 120 | 121 | RUN npm install && npm run build 122 | 123 | # If you aren't using a custom config.json file you must remove the following line 124 | ADD config.json . 125 | 126 | EXPOSE 3000 127 | 128 | CMD ["npm", "run", "start"] 129 | 130 | ``` 131 | 132 | And we can build an image using the previous Dockerfile: 133 | 134 | ``` 135 | docker build . -t rendertron:3.0 136 | docker run -d --log-opt max-size=100m --log-opt max-file=3 --name rendertron -p 3000:3000 rendertron:3.0 137 | ``` 138 | 139 | The rendertron api will be avaiable at localhost:3000 140 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ## What is Rendertron? 2 | 3 | > Rendertron is a headless Chrome rendering solution designed to render & 4 | > serialise web pages on the fly. 5 | 6 | - hammer 7 | Built with [Puppeteer](https://github.com/GoogleChrome/puppeteer) 8 | - cloud 9 | Easy deployment to Google Cloud 10 | - mag 11 | Improves SEO 12 | 13 | Rendertron is designed to enable your Progressive Web App (PWA) to serve the 14 | correct content to any bot that doesn't render or execute JavaScript. Rendertron 15 | runs as a standalone HTTP server. Rendertron renders requested pages using 16 | Headless Chrome, [auto-detecting](#auto-detecting-loading-function) when your 17 | PWA has completed loading and serializes the response back to the original 18 | request. To use Rendertron, your application configures 19 | [middleware](#middleware) to determine whether to proxy a request to Rendertron. 20 | Rendertron is compatible with all client side technologies, including 21 | [web components](#web-components). 22 | 23 | ## Demo endpoint 24 | 25 | A demo Rendertron service is available at https://render-tron.appspot.com/. It 26 | is not designed to be used as a production endpoint. You can use it, but there 27 | are no uptime guarantees. 28 | 29 | ## Learn more 30 | 31 | - [Rendertron user guide](users-guide) 32 | - [Configuring Rendertron](configure) 33 | - [Deploying Rendertron](deploy) 34 | - [Using Rendertron with your server](server-setup) 35 | - [API Reference](api-reference) 36 | - [Best practices](best_practices) 37 | - [Contributing to Rendertron](https://github.com/GoogleChrome/rendertron/blob/master/CONTRIBUTING.md) 38 | -------------------------------------------------------------------------------- /docs/server-setup.md: -------------------------------------------------------------------------------- 1 | ## Using Rendertron with your server 2 | 3 | Rendertron provides a web server that accepts request for rendering pages. 4 | Usually your web application already uses a web server and you need to configure your web server to redirect requests from the desired bots and crawlers to your Rendertron server. 5 | 6 | This can be done with every common web server but the steps differ between them. 7 | Here are guides for some web servers: 8 | 9 | - [Apache](./server-setup/apache) 10 | - [nginx](./server-setup/nginx) 11 | - [express.js](./server-setup/expressjs) 12 | 13 | If you have instructions for other web servers, please consider [contributing to this page](https://github.com/googlechrome/rendertron)! 14 | -------------------------------------------------------------------------------- /docs/server-setup/apache.md: -------------------------------------------------------------------------------- 1 | ## Set up Rendertron with Apache 2 | 3 | To use Rendertron with Apache, set up a conditional URL rewrite based on the user agent. 4 | You can do this either in an `.htaccess` file, the `VirtualHost` configuration or the main configuration file. 5 | 6 | ### Prerequisites 7 | 8 | Your Apache needs to have `mod_rewrite` and `mod_proxy_http` enabled for this configuration. On Debian and Ubuntu, run these commands to activate these modules: 9 | 10 | ``` 11 | sudo a2enmod rewrite proxy_http 12 | sudo service apache2 restart 13 | ``` 14 | 15 | ### Basic configuration 16 | 17 | Use the following configuration to send all requests from user agents containing `bot` to Rendertron: 18 | 19 | ``` 20 | RewriteEngine On 21 | RewriteCond %{HTTP_USER_AGENT} bot 22 | # Replace the PUT-YOUR-RENDERTRON-URL-HERE with the URL of your Rendertron instance 23 | # Replace YOUR-WEBAPP-ROOT-URL with the base URL of your web application (e.g. example.com) 24 | RewriteRule ^(.*)$ https://PUT-YOUR-RENDERTRON-URL-HERE/render/https://YOUR-WEBAPP-ROOT-URL$1 [P,L] 25 | ``` 26 | 27 | ### Sending multiple bot user agents to Rendertron 28 | 29 | To make your Apache web server send requests from a list of bots to your Rendertron instance, use this syntax: 30 | 31 | ``` 32 | RewriteEngine On 33 | RewriteCond %{HTTP_USER_AGENT} facebookexternalhit|linkedinbot|twitterbot 34 | # Replace the PUT-YOUR-RENDERTRON-URL-HERE with the URL of your Rendertron instance 35 | # Replace YOUR-WEBAPP-ROOT-URL with the base URL of your web application (e.g. example.com) 36 | RewriteRule ^(.*)$ https://PUT-YOUR-RENDERTRON-URL-HERE/render/https://YOUR-WEBAPP-ROOT-URL$1 [P,L] 37 | ``` 38 | 39 | Separate the bot names with the pipe (`|`) character. 40 | This configuration is case-sensitive, so `googlebot` works while `Googlebot` doesn't. 41 | -------------------------------------------------------------------------------- /docs/server-setup/expressjs.md: -------------------------------------------------------------------------------- 1 | ## Set up Rendertron with express.js 2 | 3 | If you use [expressjs](https://expressjs.com) you can use the [rendertron-middleware] to add Rendertron to your express.js application. 4 | 5 | ### Install rendertron-middleware 6 | 7 | Inside the root directory of your web application, run the following command: 8 | 9 | ``` 10 | npm install --save rendertron-middleware 11 | ``` 12 | 13 | ### Setup your express.js application to use the middleware 14 | 15 | ```javascript 16 | const express = require('express'); 17 | const rendertron = require('rendertron-middleware'); 18 | 19 | const app = express(); 20 | 21 | app.use( 22 | rendertron.makeMiddleware({ 23 | // replace this with the web address of your rendertron instance 24 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render', 25 | }) 26 | ); 27 | 28 | app.use(express.static('files')); 29 | app.listen(8080); 30 | ``` 31 | 32 | ### Configure which user agents are pre-rendered with Rendertron 33 | 34 | The middleware comes with a pre-configured [bot list](https://github.com/GoogleChrome/rendertron/blob/a1dd3ab1f054bc19e89dcdecdb71dc004f7d068e/middleware/src/middleware.ts#L24-L41). 35 | 36 | If you wish to use Rendertron for other bots, you can either _replace_ or _extend_ this list. 37 | 38 | To replace the list with your own, configure the middleware like this: 39 | 40 | ```javascript 41 | // only use Rendertron for LinkedInBot and Twitterbot 42 | const myBotList = ['linkedinbot', 'twitterbot']; 43 | 44 | app.use( 45 | rendertron.makeMiddleware({ 46 | // replace the default bot list with your own: 47 | userAgentPattern: new RegExp(myBotList.join('|'), 'i'), 48 | // replace this with the web address of your rendertron instance 49 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render', 50 | }) 51 | ); 52 | ``` 53 | 54 | You can also extend the bot list to include more bots: 55 | 56 | ```javascript 57 | // add googlebot and yolobot to bot list 58 | const myBotList = rendertron.botUserAgents.concat(['googlebot', 'yolobot']); 59 | 60 | app.use( 61 | rendertron.makeMiddleware({ 62 | // use the extended bot list: 63 | userAgentPattern: new RegExp(myBotList.join('|'), 'i'), 64 | // replace this with the web address of your rendertron instance 65 | proxyUrl: 'http://PUT-YOUR-RENDERTRON-URL-HERE/render', 66 | }) 67 | ); 68 | ``` 69 | -------------------------------------------------------------------------------- /docs/server-setup/nginx.md: -------------------------------------------------------------------------------- 1 | ## Set up Rendertron with nginx 2 | 3 | To use Rendertron with nginx, [set up nginx as a reverse proxy](https://docs.nginx.com/nginx/admin-guide/web-server/reverse-proxy/). 4 | 5 | To use Rendertron only for bots, check the `$http_user_agent`. When it's looking like a bot, send the request to Rendertron, otherwise send it to your web application directly. 6 | 7 | ### Sample configuration for a single bot 8 | 9 | To send requests from user agents containing `bot` to Rendertron, use the following configuration: 10 | 11 | ``` 12 | server { 13 | listen 80; 14 | server_name example.com; 15 | # ...other configuration... 16 | 17 | # only send requests from user agents containing the word "bot" to Rendertron 18 | if ($http_user_agent ~* 'bot') { 19 | rewrite ^(.*)$ /rendertron/$1; 20 | } 21 | 22 | location /rendertron/ { 23 | proxy_set_header X-Real-IP $remote_addr; 24 | proxy_set_header X-Forwarded-For $remote_addr; 25 | # replace PUT-YOUR-RENDERTRON-URL-HERE with your rendertron server address below 26 | proxy_pass http://PUT-YOUR-RENDERTRON-URL-HERE/render/$scheme://$host:$server_port$request_uri; 27 | } 28 | } 29 | ``` 30 | 31 | ### Setting up Rendertron for multiple bot user agents: 32 | 33 | To enable Rendertron for a list of (bot) user agents, you can map the `$http_user_agent` to a custom variable indicating if you consider this user agent a bot. To do so, add this to your `nginx.conf`: 34 | 35 | ``` 36 | # Creates $is_bot variable and match user agents 37 | map $http_user_agent $is_bot { 38 | default 0; 39 | '~*googlebot' 1; 40 | '~*bingbot' 1; 41 | # add more lines for other user agents here 42 | } 43 | ``` 44 | 45 | In your site configuration, you can use the following to send requests where `$is_bot` is 1 to Rendertron: 46 | 47 | ``` 48 | server { 49 | listen 80; 50 | server_name example.com; 51 | # ...other configuration... 52 | # only send requests from user agents containing the word "bot" to Rendertron 53 | if ($is_bot = 1) { 54 | rewrite ^(.*)$ /rendertron/$1; 55 | } 56 | 57 | location /rendertron/ { 58 | proxy_set_header X-Real-IP $remote_addr; 59 | proxy_set_header X-Forwarded-For $remote_addr; 60 | # replace PUT-YOUR-RENDERTRON-URL-HERE with your rendertron server address below 61 | proxy_pass http://PUT-YOUR-RENDERTRON-URL-HERE/render/$scheme://$host:$server_port$request_uri; 62 | } 63 | } 64 | ``` 65 | -------------------------------------------------------------------------------- /middleware/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | 5 | ## [0.1.5] 2018-12-14 6 | 7 | - Add TelegramBot as bot. 8 | 9 | ## [0.1.4] 2018-12-10 10 | 11 | - Add Whatsapp as bot. 12 | 13 | ## [0.1.3] 2018-09-18 14 | 15 | - Converted to TypeScript. 16 | 17 | ## [0.1.2] 2017-08-29 18 | 19 | - Fix bug with wc-inject-shadydom URL parameter. 20 | 21 | ## [0.1.1] 2017-08-23 22 | 23 | - Remove broken typings configuration. 24 | 25 | ## [0.1.0] 2017-08-17 26 | 27 | - Initial release. 28 | -------------------------------------------------------------------------------- /middleware/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /middleware/README.md: -------------------------------------------------------------------------------- 1 | # rendertron-middleware 2 | 3 | [![CI](https://github.com/GoogleChrome/rendertron/workflows/CI/badge.svg)](https://github.com/GoogleChrome/rendertron/actions) 4 | [![NPM version](http://img.shields.io/npm/v/rendertron-middleware.svg)](https://www.npmjs.com/package/rendertron-middleware) 5 | 6 | An Express middleware for [Rendertron](https://github.com/samuelli/bot-render). 7 | 8 | Rendertron is a server which runs headless Chrome and renders web pages on the fly, which can be set up to serve pages to search engines, social networks and link rendering bots. 9 | 10 | This middleware checks the User-Agent header of incoming requests, and if it matches one of a configurable set of bots, proxies that request through Rendertron. 11 | 12 | ## Usage 13 | 14 | ```sh 15 | $ npm install --save express rendertron-middleware 16 | ``` 17 | 18 | ```js 19 | const express = require('express'); 20 | const rendertron = require('rendertron-middleware'); 21 | 22 | const app = express(); 23 | 24 | app.use( 25 | rendertron.makeMiddleware({ 26 | proxyUrl: 'http://my-rendertron-instance/render', 27 | }) 28 | ); 29 | 30 | app.use(express.static('files')); 31 | app.listen(8080); 32 | ``` 33 | 34 | ## Configuration 35 | 36 | The `makeMiddleware` function takes a configuration object with the following 37 | properties: 38 | 39 | | Property | Default | Description | 40 | | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | 41 | | `proxyUrl` | _Required_ | Base URL of your running Rendertron proxy service. | 42 | | `userAgentPattern` | A set of known bots that benefit from pre-rendering. [Full list.](https://github.com/samuelli/bot-render/blob/master/middleware/src/middleware.ts) | RegExp for matching requests by User-Agent header. | 43 | | `excludeUrlPattern` | A set of known static file extensions. [Full list.](https://github.com/samuelli/bot-render/blob/master/middleware/src/middleware.ts) | RegExp for excluding requests by the path component of the URL. | 44 | | `injectShadyDom` | `false` | Force the web components polyfills to be loaded. [Read more.](https://github.com/samuelli/bot-render#web-components) | 45 | | `timeout` | `11000` | Millisecond timeout for the proxy request to Rendertron. If exceeded, the standard response is served (i.e. `next()` is called). This is **not** the timeout for the Rendertron server itself. See also the [Rendertron timeout.](https://github.com/googlechrome/rendertron#rendering-budget-timeout) | 46 | | `allowedForwardedHosts` | `[]` | If a forwarded host header is found and matches one of the hosts in this array, then that host will be used for the request to the rendertron server instead of the actual host of the current request. This is usedful if this middleware is running on a different host which is proxied behind the actual site, and the rendertron server should request the main site. **Note:** For security, because the header info is untrusted, only those hosts which you explicitly allow will be forwarded, otherwise they will be ignored. Leaving this undefined or empty (the default) will disable host forwarding. | 47 | | `forwardedHostHeader` | `"X-Forwarded-Host"` | Header used to determine the forwarded host that should be used when building the URL to be rendered. Only used if `allowedForwardedHosts` is not empty. | 48 | -------------------------------------------------------------------------------- /middleware/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rendertron-middleware", 3 | "version": "0.1.6", 4 | "description": "Express middleware for the rendertron service.", 5 | "engines": { 6 | "node": ">=10" 7 | }, 8 | "main": "build/middleware.js", 9 | "types": "build/middleware.d.ts", 10 | "scripts": { 11 | "build": "tsc", 12 | "prepack": "npm run build", 13 | "test": "npm run build && ava build/*/*-test.js" 14 | }, 15 | "files": [ 16 | "build/", 17 | "!build/test/" 18 | ], 19 | "license": "Apache-2.0", 20 | "dependencies": { 21 | "request": "^2.88.2" 22 | }, 23 | "devDependencies": { 24 | "@types/express": "^4.17.11", 25 | "@types/request": "^2.48.5", 26 | "@types/supertest": "^2.0.11", 27 | "ava": "^3.15.0", 28 | "express": "^4.17.1", 29 | "supertest": "^6.1.3", 30 | "typescript": "~4.2.4" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /middleware/src/middleware.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | import express from 'express'; 18 | import request from 'request'; 19 | 20 | /** 21 | * A default set of user agent patterns for bots/crawlers that do not perform 22 | * well with pages that require JavaScript. 23 | */ 24 | export const botUserAgents = [ 25 | 'Baiduspider', 26 | 'bingbot', 27 | 'Embedly', 28 | 'facebookexternalhit', 29 | 'LinkedInBot', 30 | 'outbrain', 31 | 'pinterest', 32 | 'quora link preview', 33 | 'rogerbot', 34 | 'showyoubot', 35 | 'Slackbot', 36 | 'TelegramBot', 37 | 'Twitterbot', 38 | 'vkShare', 39 | 'W3C_Validator', 40 | 'WhatsApp', 41 | ]; 42 | 43 | /** 44 | * A default set of file extensions for static assets that do not need to be 45 | * proxied. 46 | */ 47 | const staticFileExtensions = [ 48 | 'ai', 49 | 'avi', 50 | 'css', 51 | 'dat', 52 | 'dmg', 53 | 'doc', 54 | 'doc', 55 | 'exe', 56 | 'flv', 57 | 'gif', 58 | 'ico', 59 | 'iso', 60 | 'jpeg', 61 | 'jpg', 62 | 'js', 63 | 'less', 64 | 'm4a', 65 | 'm4v', 66 | 'mov', 67 | 'mp3', 68 | 'mp4', 69 | 'mpeg', 70 | 'mpg', 71 | 'pdf', 72 | 'png', 73 | 'ppt', 74 | 'psd', 75 | 'rar', 76 | 'rss', 77 | 'svg', 78 | 'swf', 79 | 'tif', 80 | 'torrent', 81 | 'ttf', 82 | 'txt', 83 | 'wav', 84 | 'wmv', 85 | 'woff', 86 | 'xls', 87 | 'xml', 88 | 'zip', 89 | ]; 90 | 91 | /** 92 | * Options for makeMiddleware. 93 | */ 94 | export interface Options { 95 | /** 96 | * Base URL of the Rendertron proxy service. Required. 97 | */ 98 | proxyUrl: string; 99 | 100 | /** 101 | * Regular expression to match user agent to proxy. Defaults to a set of bots 102 | * that do not perform well with pages that require JavaScript. 103 | */ 104 | userAgentPattern?: RegExp; 105 | 106 | /** 107 | * Regular expression used to exclude request URL paths. Defaults to a set of 108 | * typical static asset file extensions. 109 | */ 110 | excludeUrlPattern?: RegExp; 111 | 112 | /** 113 | * Force web components polyfills to be loaded and enabled. Defaults to false. 114 | */ 115 | injectShadyDom?: boolean; 116 | 117 | /** 118 | * Millisecond timeout for proxy requests. Defaults to 11000 milliseconds. 119 | */ 120 | timeout?: number; 121 | 122 | /** 123 | * If a forwarded host header is found and matches one of the hosts in this 124 | * array, then that host will be used for the request to the rendertron server 125 | * instead of the actual host of the request. 126 | * This is usedful if this middleware is running on a different host 127 | * which is proxied behind the actual site, and the rendertron server should 128 | * request the main site. 129 | */ 130 | allowedForwardedHosts?: string[]; 131 | 132 | /** 133 | * Header used to determine the forwarded host that should be used when 134 | * building the URL to be rendered. Only applicable if `allowedForwardedHosts` 135 | * is not empty. 136 | * Defaults to `"X-Forwarded-Host"`. 137 | */ 138 | forwardedHostHeader?: string; 139 | } 140 | 141 | /** 142 | * Create a new Express middleware function that proxies requests to a 143 | * Rendertron bot rendering service. 144 | */ 145 | export function makeMiddleware(options: Options): express.Handler { 146 | if (!options || !options.proxyUrl) { 147 | throw new Error('Must set options.proxyUrl.'); 148 | } 149 | let proxyUrl = options.proxyUrl; 150 | if (!proxyUrl.endsWith('/')) { 151 | proxyUrl += '/'; 152 | } 153 | const userAgentPattern = 154 | options.userAgentPattern || new RegExp(botUserAgents.join('|'), 'i'); 155 | const excludeUrlPattern = 156 | options.excludeUrlPattern || 157 | new RegExp(`\\.(${staticFileExtensions.join('|')})$`, 'i'); 158 | const injectShadyDom = !!options.injectShadyDom; 159 | // The Rendertron service itself has a hard limit of 10 seconds to render, so 160 | // let's give a little more time than that by default. 161 | const timeout = options.timeout || 11000; // Milliseconds. 162 | const allowedForwardedHosts = options.allowedForwardedHosts || []; 163 | const forwardedHostHeader = allowedForwardedHosts.length 164 | ? options.forwardedHostHeader || 'X-Forwarded-Host' 165 | : null; 166 | 167 | return function rendertronMiddleware(req, res, next) { 168 | const ua = req.headers['user-agent']; 169 | if ( 170 | ua === undefined || 171 | !userAgentPattern.test(ua) || 172 | excludeUrlPattern.test(req.path) 173 | ) { 174 | next(); 175 | return; 176 | } 177 | const forwardedHost = forwardedHostHeader && req.get(forwardedHostHeader); 178 | const host = 179 | forwardedHost && allowedForwardedHosts.includes(forwardedHost) 180 | ? forwardedHost 181 | : req.get('host'); 182 | const incomingUrl = req.protocol + '://' + host + req.originalUrl; 183 | let renderUrl = proxyUrl + encodeURIComponent(incomingUrl); 184 | if (injectShadyDom) { 185 | renderUrl += '?wc-inject-shadydom=true'; 186 | } 187 | request({ url: renderUrl, timeout }, (e) => { 188 | if (e) { 189 | console.error( 190 | `[rendertron middleware] ${e.code} error fetching ${renderUrl}` 191 | ); 192 | next(); 193 | } 194 | }).pipe(res); 195 | }; 196 | } 197 | -------------------------------------------------------------------------------- /middleware/src/test/middleware-test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | import test from 'ava'; 18 | import express from 'express'; 19 | import net from 'net'; 20 | import supertest from 'supertest'; 21 | 22 | import * as rendertron from '../middleware'; 23 | 24 | /** 25 | * Start the given Express app on localhost with a random port. 26 | */ 27 | async function listen(app: express.Application): Promise { 28 | return new Promise((resolve: (url: string) => void) => { 29 | const server = app.listen(/* random */ 0, 'localhost', () => { 30 | resolve(`http://localhost:${(server.address() as net.AddressInfo).port}`); 31 | }); 32 | }); 33 | } 34 | 35 | /** 36 | * Make an Express app that uses the Rendertron middleware and returns 37 | * "fallthrough" if the middleware skipped the request (i.e. called `next`). 38 | */ 39 | function makeApp(options: rendertron.Options) { 40 | return express() 41 | .use(rendertron.makeMiddleware(options)) 42 | .use((_req, res) => res.end('fallthrough')); 43 | } 44 | 45 | /** 46 | * Make an Express app that takes the place of a Rendertron server instance and 47 | * always responds with "proxy ". 48 | */ 49 | function makeProxy() { 50 | return express().use((req, res) => { 51 | res.end('proxy ' + decodeURIComponent(req.url.substring(1))); 52 | }); 53 | } 54 | 55 | const bot = 'slackbot'; 56 | const human = 'Chrome'; 57 | 58 | /** 59 | * GET a URL with the given user agent. 60 | */ 61 | async function get( 62 | userAgent: string, 63 | host: string, 64 | path: string, 65 | headers?: Record 66 | ) { 67 | const t = supertest(host).get(path).set('User-Agent', userAgent); 68 | if (headers) { 69 | for (const key in headers) { 70 | t.set(key, headers[key]); 71 | } 72 | } 73 | return await t; 74 | } 75 | 76 | test('makes a middleware function', async (t) => { 77 | const m = rendertron.makeMiddleware({ proxyUrl: 'http://example.com' }); 78 | t.truthy(m); 79 | }); 80 | 81 | test('throws if no proxyUrl given', async (t) => { 82 | const makeMiddlewareUntyped = rendertron.makeMiddleware as ( 83 | options?: unknown 84 | ) => express.Application; 85 | t.throws(() => makeMiddlewareUntyped()); 86 | t.throws(() => makeMiddlewareUntyped({})); 87 | t.throws(() => makeMiddlewareUntyped({ proxyUrl: '' })); 88 | }); 89 | 90 | test('proxies through given url', async (t) => { 91 | const proxyUrl = await listen(makeProxy()); 92 | const appUrl = await listen(makeApp({ proxyUrl })); 93 | 94 | const res = await get(bot, appUrl, '/foo'); 95 | t.is(res.status, 200); 96 | t.is(res.text, 'proxy ' + appUrl + '/foo'); 97 | }); 98 | 99 | test('proxyUrl can have trailing slash', async (t) => { 100 | const proxyUrl = await listen(makeProxy()); 101 | // Make sure our other tests are testing the no-trailing-slash case. 102 | t.false(proxyUrl.endsWith('/')); 103 | const appUrl = await listen(makeApp({ proxyUrl: proxyUrl + '/' })); 104 | 105 | const res = await get(bot, appUrl, '/foo'); 106 | t.is(res.status, 200); 107 | t.is(res.text, 'proxy ' + appUrl + '/foo'); 108 | }); 109 | 110 | test('adds shady dom parameter', async (t) => { 111 | const proxyUrl = await listen(makeProxy()); 112 | const appUrl = await listen(makeApp({ proxyUrl, injectShadyDom: true })); 113 | 114 | const res = await get(bot, appUrl, '/foo'); 115 | t.is(res.status, 200); 116 | t.is(res.text, 'proxy ' + appUrl + '/foo?wc-inject-shadydom=true'); 117 | }); 118 | 119 | test('excludes static file paths by default', async (t) => { 120 | const proxyUrl = await listen(makeProxy()); 121 | const appUrl = await listen(makeApp({ proxyUrl })); 122 | 123 | const res = await get(bot, appUrl, '/foo.png'); 124 | t.is(res.text, 'fallthrough'); 125 | }); 126 | 127 | test('url exclusion only matches url path component', async (t) => { 128 | const proxyUrl = await listen(makeProxy()); 129 | const appUrl = await listen(makeApp({ proxyUrl })); 130 | 131 | const res = await get(bot, appUrl, '/foo.png?params'); 132 | t.is(res.text, 'fallthrough'); 133 | }); 134 | 135 | test('excludes non-bot user agents by default', async (t) => { 136 | const proxyUrl = await listen(makeProxy()); 137 | const appUrl = await listen(makeApp({ proxyUrl })); 138 | 139 | const res = await get(human, appUrl, '/foo'); 140 | t.is(res.text, 'fallthrough'); 141 | }); 142 | 143 | test('respects custom user agent pattern', async (t) => { 144 | const proxyUrl = await listen(makeProxy()); 145 | const appUrl = await listen(makeApp({ proxyUrl, userAgentPattern: /borg/ })); 146 | 147 | let res; 148 | 149 | res = await get('humon', appUrl, '/foo'); 150 | t.is(res.text, 'fallthrough'); 151 | 152 | res = await get('borg', appUrl, '/foo'); 153 | t.is(res.text, 'proxy ' + appUrl + '/foo'); 154 | }); 155 | 156 | test('respects custom exclude url pattern', async (t) => { 157 | const proxyUrl = await listen(makeProxy()); 158 | const appUrl = await listen(makeApp({ proxyUrl, excludeUrlPattern: /foo/ })); 159 | 160 | let res; 161 | 162 | res = await get(bot, appUrl, '/foo'); 163 | t.is(res.text, 'fallthrough'); 164 | 165 | res = await get(bot, appUrl, '/bar'); 166 | t.is(res.text, 'proxy ' + appUrl + '/bar'); 167 | }); 168 | 169 | test('forwards proxy error status and body', async (t) => { 170 | // This proxy always returns an error. 171 | const proxyUrl = await listen( 172 | express().use((_req, res) => res.status(500).end('proxy error')) 173 | ); 174 | const appUrl = await listen(makeApp({ proxyUrl })); 175 | 176 | const res = await get(bot, appUrl, '/bar'); 177 | t.is(res.status, 500); 178 | t.is(res.text, 'proxy error'); 179 | }); 180 | 181 | test('falls through after timeout', async (t) => { 182 | // This proxy returns after 20ms, but our timeout is 10ms. 183 | const proxyUrl = await listen( 184 | express().use((_req, res) => { 185 | setTimeout(() => res.end('too slow'), 20); 186 | }) 187 | ); 188 | const appUrl = await listen(makeApp({ proxyUrl, timeout: 10 })); 189 | 190 | const res = await get(bot, appUrl, '/foo'); 191 | t.is(res.text, 'fallthrough'); 192 | }); 193 | 194 | test('forwards request to allowed host', async (t) => { 195 | const forwardedHost = 'example.com'; 196 | 197 | const proxyUrl = await listen(makeProxy()); 198 | const appUrl = await listen( 199 | makeApp({ 200 | proxyUrl, 201 | allowedForwardedHosts: [forwardedHost], 202 | }) 203 | ); 204 | 205 | const forwardedUrl = new URL(appUrl); 206 | forwardedUrl.host = forwardedHost; 207 | forwardedUrl.port = ''; 208 | forwardedUrl.pathname = '/foo'; 209 | 210 | const res = await get(bot, appUrl, '/foo', { 211 | 'X-Forwarded-Host': forwardedHost, 212 | }); 213 | t.is(res.status, 200); 214 | t.is(res.text, 'proxy ' + forwardedUrl.href); 215 | }); 216 | 217 | test('ignores forwarded host that is not allowed', async (t) => { 218 | const proxyUrl = await listen(makeProxy()); 219 | const appUrl = await listen( 220 | makeApp({ 221 | proxyUrl, 222 | allowedForwardedHosts: ['example.com'], 223 | }) 224 | ); 225 | 226 | const res = await get(bot, appUrl, '/foo', { 227 | 'X-Forwarded-Host': 'malicious.com', 228 | }); 229 | t.is(res.status, 200); 230 | t.is(res.text, 'proxy ' + appUrl + '/foo'); 231 | }); 232 | -------------------------------------------------------------------------------- /middleware/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "rootDir": "./src", 5 | "outDir": "./build" 6 | }, 7 | "include": ["src/**/*.ts"] 8 | } 9 | -------------------------------------------------------------------------------- /nodemon.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignore": ["**/*.test.ts", ".git", "node_modules"], 3 | "watch": ["src"], 4 | "exec": "npm run build && npm run start", 5 | "ext": "ts" 6 | } 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rendertron", 3 | "version": "3.1.0", 4 | "description": "Renders webpages using headless Chrome for usage by bots", 5 | "license": "Apache-2.0", 6 | "repository": "https://github.com/GoogleChrome/rendertron", 7 | "engines": { 8 | "node": ">=10" 9 | }, 10 | "main": "build/rendertron.js", 11 | "types": "build/rendertron.d.ts", 12 | "bin": "bin/rendertron", 13 | "scripts": { 14 | "build": "tsc", 15 | "prepack": "npm run build", 16 | "start": "node build/rendertron.js", 17 | "format": "npm run format:eslint && npm run format:prettier", 18 | "format:eslint": "npm run lint -- --fix", 19 | "format:prettier": "prettier \"**/*.{html,js,json,md,ts}\" --ignore-path .gitignore --write", 20 | "lint": "eslint \"**/*.{js,ts}\" --ignore-path .gitignore", 21 | "monitor": "nodemon", 22 | "monitor-inspect": "nodemon --inspect src/main.js", 23 | "test": "(cd test-resources && npm install) && npm run build && ava build/test/app-test.js --timeout 5s", 24 | "start-emulator": "(gcloud beta emulators datastore start --no-store-on-disk --project emulator-project --host-port localhost:8380 &) 2>&1 | grep -m1 'now running'", 25 | "test-cache": "npm run build && npm run start-emulator && $(gcloud beta emulators datastore env-init) && export GCLOUD_PROJECT=emulator-project && ava build/test/*-cache-test.js" 26 | }, 27 | "files": [ 28 | "bin/", 29 | "build/", 30 | "!build/test/", 31 | "app.json" 32 | ], 33 | "dependencies": { 34 | "@webcomponents/webcomponentsjs": "^2.5.0", 35 | "chrome-launcher": "^0.13.4", 36 | "chrome-remote-interface": "^0.29.0", 37 | "fs-extra": "^9.1.0", 38 | "koa": "^2.13.1", 39 | "koa-bodyparser": "^4.3.0", 40 | "koa-compress": "^5.0.1", 41 | "koa-logger": "^3.2.1", 42 | "koa-route": "^3.2.0", 43 | "koa-send": "^5.0.1", 44 | "koa-static": "^5.0.0", 45 | "performance-now": "^2.1.0", 46 | "puppeteer": "^10.0.0", 47 | "semver": "^7.3.4" 48 | }, 49 | "devDependencies": { 50 | "@google-cloud/datastore": "^6.3.1", 51 | "@types/fs-extra": "^9.0.7", 52 | "@types/koa": "^2.11.6", 53 | "@types/koa-bodyparser": "^4.3.0", 54 | "@types/koa-compress": "^4.0.1", 55 | "@types/koa-logger": "^3.1.1", 56 | "@types/koa-route": "^3.2.4", 57 | "@types/koa-send": "^4.1.2", 58 | "@types/koa-static": "^4.0.1", 59 | "@types/node": "^14.14.10", 60 | "@types/puppeteer": "^5.4.3", 61 | "@types/supertest": "^2.0.10", 62 | "@typescript-eslint/parser": "^4.14.2", 63 | "@typescript-eslint/eslint-plugin": "^4.15.1", 64 | "ava": "^3.15.0", 65 | "eslint": "^7.23.0", 66 | "nodemon": "^2.0.6", 67 | "prettier": "^2.2.1", 68 | "supertest": "^6.1.3", 69 | "typescript": "4.0.5" 70 | }, 71 | "prettier": { 72 | "singleQuote": true 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | not 6 | * use this file except in compliance with the License. You may obtain a copy 7 | of 8 | * the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | * License for the specific language governing permissions and limitations 16 | under 17 | * the License. 18 | */ 19 | 20 | 'use strict'; 21 | 22 | import * as fse from 'fs-extra'; 23 | import * as path from 'path'; 24 | import * as os from 'os'; 25 | 26 | const CONFIG_PATH = path.resolve(__dirname, '../config.json'); 27 | 28 | export type Config = { 29 | cache: 'datastore' | 'memory' | 'filesystem' | null; 30 | cacheConfig: { [key: string]: string }; 31 | timeout: number; 32 | port: string; 33 | host: string; 34 | width: number; 35 | height: number; 36 | reqHeaders: { [key: string]: string }; 37 | headers: { [key: string]: string }; 38 | puppeteerArgs: Array; 39 | renderOnly: Array; 40 | closeBrowser: boolean; 41 | restrictedUrlPattern: string | null; 42 | }; 43 | 44 | export class ConfigManager { 45 | public static config: Config = { 46 | cache: null, 47 | cacheConfig: { 48 | snapshotDir: path.join(os.tmpdir(), 'rendertron'), 49 | cacheDurationMinutes: (60 * 24).toString(), 50 | cacheMaxEntries: '100', 51 | }, 52 | timeout: 10000, 53 | port: '3000', 54 | host: '0.0.0.0', 55 | width: 1000, 56 | height: 1000, 57 | reqHeaders: {}, 58 | headers: {}, 59 | puppeteerArgs: ['--no-sandbox'], 60 | renderOnly: [], 61 | closeBrowser: false, 62 | restrictedUrlPattern: null 63 | }; 64 | 65 | static async getConfiguration(): Promise { 66 | // Load config.json if it exists. 67 | if (fse.pathExistsSync(CONFIG_PATH)) { 68 | const configJson = await fse.readJson(CONFIG_PATH); 69 | 70 | // merge cacheConfig 71 | const cacheConfig = Object.assign( 72 | ConfigManager.config.cacheConfig, 73 | configJson.cacheConfig 74 | ); 75 | 76 | ConfigManager.config = Object.assign(ConfigManager.config, configJson); 77 | 78 | ConfigManager.config.cacheConfig = cacheConfig; 79 | } 80 | return ConfigManager.config; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/datastore-cache.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | not 6 | * use this file except in compliance with the License. You may obtain a copy 7 | of 8 | * the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | * License for the specific language governing permissions and limitations 16 | under 17 | * the License. 18 | */ 19 | 20 | 'use strict'; 21 | 22 | import Koa from 'koa'; 23 | import { Config, ConfigManager } from './config'; 24 | 25 | import { Datastore } from '@google-cloud/datastore'; 26 | import { entity } from '@google-cloud/datastore/build/src/entity'; 27 | 28 | type CacheContent = { 29 | saved: Date; 30 | expires: Date; 31 | headers: string; 32 | payload: string; 33 | }; 34 | 35 | type DatastoreObject = { 36 | [Datastore.KEY]: Record; 37 | }; 38 | 39 | export class DatastoreCache { 40 | private datastore: Datastore = new Datastore(); 41 | private config: Config = ConfigManager.config; 42 | 43 | async clearCache() { 44 | const query = this.datastore.createQuery('Page'); 45 | const data = await query.run(); 46 | const entities = data[0]; 47 | const entityKeys = entities.map( 48 | (entity: Record) => 49 | (entity as DatastoreObject)[Datastore.KEY] 50 | ); 51 | console.log(`Removing ${entities.length} items from the cache`); 52 | await this.datastore.delete(entityKeys); 53 | // TODO(samli): check info (data[1]) and loop through pages of entities to 54 | // delete. 55 | } 56 | 57 | async cacheContent( 58 | // eslint-disable-next-line @typescript-eslint/ban-types 59 | key: object, 60 | headers: Record, 61 | payload: Buffer 62 | ) { 63 | const now = new Date(); 64 | // query datastore to see if we are over the max number of allowed entries, and max entries isn't disabled with a value of -1 and remove over quota, removes oldest first 65 | if (parseInt(this.config.cacheConfig.cacheMaxEntries) !== -1) { 66 | const query = this.datastore 67 | .createQuery('Page') 68 | .select('__key__') 69 | .order('expires'); 70 | // eslint-disable-next-line @typescript-eslint/no-this-alias 71 | const self = this; 72 | this.datastore.runQuery(query, function (err, entities) { 73 | if (err) { 74 | console.log(`datastore err: ${err} reported`); 75 | } 76 | const dataStoreCache = (entities || []).map( 77 | (entity: Record) => 78 | (entity as DatastoreObject)[Datastore.KEY] 79 | ); 80 | if ( 81 | dataStoreCache.length >= 82 | parseInt(self.config.cacheConfig.cacheMaxEntries) 83 | ) { 84 | const toRemove = 85 | dataStoreCache.length - 86 | parseInt(self.config.cacheConfig.cacheMaxEntries) + 87 | 1; 88 | const toDelete = dataStoreCache.slice(0, toRemove); 89 | console.log(`Deleting: ${toRemove}`); 90 | self.datastore.delete(toDelete); 91 | } 92 | }); 93 | } 94 | const entity = { 95 | key: key, 96 | data: [ 97 | { name: 'saved', value: now }, 98 | { 99 | name: 'expires', 100 | value: new Date( 101 | now.getTime() + 102 | parseInt(this.config.cacheConfig.cacheDurationMinutes) * 60 * 1000 103 | ), 104 | }, 105 | { 106 | name: 'headers', 107 | value: JSON.stringify(headers), 108 | excludeFromIndexes: true, 109 | }, 110 | { 111 | name: 'payload', 112 | value: JSON.stringify(payload), 113 | excludeFromIndexes: true, 114 | }, 115 | ], 116 | }; 117 | await this.datastore.save(entity); 118 | } 119 | 120 | async removeEntry(key: string) { 121 | const datastoreKey = this.datastore.key(['Page', key]); 122 | await this.datastore.delete(datastoreKey); 123 | } 124 | 125 | async getCachedContent(ctx: Koa.Context, key: entity.Key) { 126 | if (ctx.query.refreshCache) { 127 | return null; 128 | } else { 129 | return await this.datastore.get(key); 130 | } 131 | } 132 | 133 | /** 134 | * Returns middleware function. 135 | */ 136 | middleware() { 137 | const cacheContent = this.cacheContent.bind(this); 138 | 139 | return async function ( 140 | this: DatastoreCache, 141 | ctx: Koa.Context, 142 | next: () => Promise 143 | ) { 144 | // Cache based on full URL. This means requests with different params are 145 | // cached separately (except for refreshCache parameter) 146 | let cacheKey = ctx.url.replace(/&?refreshCache=(?:true|false)&?/i, ''); 147 | 148 | if (cacheKey.charAt(cacheKey.length - 1) === '?') { 149 | cacheKey = cacheKey.slice(0, -1); 150 | } 151 | const key = this.datastore.key(['Page', cacheKey]); 152 | const results = await this.getCachedContent(ctx, key); 153 | if (results && results.length && results[0] !== undefined) { 154 | const content = results[0] as CacheContent; 155 | // Serve cached content if its not expired. 156 | if ( 157 | content.expires.getTime() >= new Date().getTime() || 158 | parseInt(this.config.cacheConfig.cacheDurationMinutes) === -1 159 | ) { 160 | const headers = JSON.parse(content.headers); 161 | ctx.set(headers); 162 | ctx.set('x-rendertron-cached', content.saved.toUTCString()); 163 | try { 164 | let payload = JSON.parse(content.payload); 165 | if ( 166 | payload && 167 | typeof payload === 'object' && 168 | payload.type === 'Buffer' 169 | ) { 170 | payload = Buffer.from(payload); 171 | } 172 | ctx.body = payload; 173 | return; 174 | } catch (error) { 175 | console.log( 176 | 'Erroring parsing cache contents, falling back to normal render' 177 | ); 178 | } 179 | } 180 | } 181 | 182 | await next(); 183 | 184 | if (ctx.status === 200) { 185 | cacheContent(key, ctx.response.headers, ctx.body); 186 | } 187 | }.bind(this); 188 | } 189 | 190 | invalidateHandler() { 191 | return this.handleInvalidateRequest.bind(this); 192 | } 193 | 194 | private async handleInvalidateRequest(ctx: Koa.Context, url: string) { 195 | this.removeEntry(url); 196 | ctx.status = 200; 197 | } 198 | 199 | clearAllCacheHandler() { 200 | return this.handleClearAllCacheRequest.bind(this); 201 | } 202 | 203 | private async handleClearAllCacheRequest(ctx: Koa.Context) { 204 | this.clearCache(); 205 | ctx.status = 200; 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/filesystem-cache.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | not 6 | * use this file except in compliance with the License. You may obtain a copy 7 | of 8 | * the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | * License for the specific language governing permissions and limitations 16 | under 17 | * the License. 18 | */ 19 | 20 | 'use strict'; 21 | 22 | import { createHash } from 'crypto'; 23 | 24 | import * as fs from 'fs'; 25 | import * as path from 'path'; 26 | import * as Koa from 'koa'; 27 | import { Config } from './config'; 28 | 29 | type CacheContent = { 30 | saved: Date; 31 | expires: Date; 32 | response: string; 33 | payload: string; 34 | }; 35 | 36 | export class FilesystemCache { 37 | private config: Config; 38 | private cacheConfig: { [key: string]: string }; 39 | 40 | constructor(config: Config) { 41 | this.config = config; 42 | this.cacheConfig = this.config.cacheConfig; 43 | } 44 | 45 | hashCode = (s: string) => { 46 | const hash = 0; 47 | if (s.length === 0) return hash.toString(); 48 | 49 | return createHash('md5').update(s).digest('hex'); 50 | }; 51 | 52 | getDir = (key: string) => { 53 | const dir = this.cacheConfig.snapshotDir; 54 | if (!fs.existsSync(dir)) { 55 | fs.mkdirSync(dir, { recursive: true }); 56 | } 57 | 58 | if (key) { 59 | return path.join(dir, key); 60 | } 61 | 62 | return dir; 63 | }; 64 | 65 | async clearCache(key: string) { 66 | let cleanKey = key; 67 | if (!cleanKey.endsWith('.json')) { 68 | cleanKey += '.json'; 69 | } 70 | if (fs.existsSync(path.join(this.getDir(''), cleanKey))) { 71 | try { 72 | fs.unlinkSync(path.join(this.getDir(''), cleanKey)); 73 | console.log(`deleting: ${path.join(this.getDir(''), cleanKey)}`); 74 | } catch (err) { 75 | console.log(err); 76 | } 77 | } 78 | } 79 | 80 | clearAllCacheHandler() { 81 | return this.handleClearAllCacheRequest.bind(this); 82 | } 83 | 84 | private async handleClearAllCacheRequest(ctx: Koa.Context) { 85 | await this.clearAllCache(); 86 | ctx.status = 200; 87 | } 88 | 89 | async clearAllCache() { 90 | return new Promise((resolve) => { 91 | fs.readdir(this.getDir(''), (err, files) => { 92 | if (err) throw err; 93 | for (const file of files) { 94 | fs.unlink(path.join(this.getDir(''), file), (err) => { 95 | if (err) throw err; 96 | }); 97 | } 98 | resolve(); 99 | }); 100 | }); 101 | } 102 | 103 | private sortFilesByModDate(numCache: string[]) { 104 | const dirsDate = []; 105 | for (let i = 0; i < numCache.length; i++) { 106 | if (fs.existsSync(path.join(this.getDir(''), numCache[i]))) { 107 | const stats = fs.statSync(path.join(this.getDir(''), numCache[i])); 108 | const mtime = stats.mtime; 109 | dirsDate.push({ fileName: numCache[i], age: mtime.getTime() }); 110 | } 111 | } 112 | dirsDate.sort((a, b) => (a.age > b.age ? 1 : -1)); 113 | return dirsDate; 114 | } 115 | 116 | cacheContent(key: string, ctx: Koa.Context) { 117 | const responseHeaders = ctx.response; 118 | const responseBody = ctx.body; 119 | const request = ctx.request; 120 | // check size of stored cache to see if we are over the max number of allowed entries, and max entries isn't disabled with a value of -1 and remove over quota, removes oldest first 121 | if (parseInt(this.config.cacheConfig.cacheMaxEntries) !== -1) { 122 | const numCache = fs.readdirSync(this.getDir('')); 123 | if ( 124 | numCache.length >= parseInt(this.config.cacheConfig.cacheMaxEntries) 125 | ) { 126 | const toRemove = 127 | numCache.length - 128 | parseInt(this.config.cacheConfig.cacheMaxEntries) + 129 | 1; 130 | let dirsDate = this.sortFilesByModDate(numCache); 131 | dirsDate = dirsDate.slice(0, toRemove); 132 | dirsDate.forEach((rmDir) => { 133 | if (rmDir.fileName !== key + '.json') { 134 | console.log( 135 | `max cache entries reached - removing: ${rmDir.fileName}` 136 | ); 137 | this.clearCache(rmDir.fileName); 138 | } 139 | }); 140 | } 141 | } 142 | fs.writeFileSync( 143 | path.join(this.getDir(''), key + '.json'), 144 | JSON.stringify({ responseBody, responseHeaders, request }) 145 | ); 146 | } 147 | 148 | getCachedContent(ctx: Koa.Context, key: string): CacheContent | null { 149 | if (ctx.query.refreshCache) { 150 | return null; 151 | } else { 152 | try { 153 | const cacheFile = JSON.parse( 154 | fs.readFileSync(path.join(this.getDir(''), key + '.json'), 'utf8') 155 | ); 156 | const payload = cacheFile.responseBody; 157 | const response = JSON.stringify(cacheFile.responseHeaders); 158 | if (!payload) { 159 | return null; 160 | } 161 | const fd = fs.openSync(path.join(this.getDir(''), key + '.json'), 'r'); 162 | const stats = fs.fstatSync(fd); 163 | // use modification time as the saved time 164 | const saved = stats.mtime; 165 | const expires = new Date( 166 | saved.getTime() + 167 | parseInt(this.cacheConfig.cacheDurationMinutes) * 60 * 1000 168 | ); 169 | return { 170 | saved, 171 | expires, 172 | payload, 173 | response, 174 | }; 175 | } catch (err) { 176 | return null; 177 | } 178 | } 179 | } 180 | invalidateHandler() { 181 | return this.handleInvalidateRequest.bind(this); 182 | } 183 | 184 | sanitizeKey(key: string) { 185 | // Cache based on full URL. This means requests with different params are 186 | // cached separately (except for refreshCache parameter 187 | let cacheKey = key.replace(/&?refreshCache=(?:true|false)&?/i, ''); 188 | 189 | if (cacheKey.charAt(cacheKey.length - 1) === '?') { 190 | cacheKey = cacheKey.slice(0, -1); 191 | } 192 | 193 | // remove /render/ from key, only at the start 194 | if (cacheKey.startsWith('/render/')) { 195 | cacheKey = cacheKey.substring(8); 196 | } 197 | 198 | // remove trailing slash from key 199 | cacheKey = cacheKey.replace(/\/$/, ''); 200 | return cacheKey 201 | } 202 | 203 | private async handleInvalidateRequest(ctx: Koa.Context, url: string) { 204 | let cacheKey = this.sanitizeKey(url); 205 | 206 | // remove /invalidate/ from key, only at the start 207 | if (cacheKey.startsWith('/invalidate/')) { 208 | cacheKey = cacheKey.substring(12); 209 | } 210 | 211 | // key is hashed crudely 212 | const key = this.hashCode(cacheKey); 213 | this.clearCache(key); 214 | ctx.status = 200; 215 | } 216 | 217 | 218 | 219 | /** 220 | * Returns middleware function. 221 | */ 222 | middleware() { 223 | const cacheContent = this.cacheContent.bind(this); 224 | 225 | return async function ( 226 | this: FilesystemCache, 227 | ctx: Koa.Context, 228 | next: () => Promise 229 | ) { 230 | 231 | const cacheKey = this.sanitizeKey(ctx.url); 232 | // key is hashed crudely 233 | const key = this.hashCode(cacheKey); 234 | const content = await this.getCachedContent(ctx, key); 235 | if (content) { 236 | // Serve cached content if its not expired. 237 | if ( 238 | content.expires.getTime() >= new Date().getTime() || 239 | parseInt(this.config.cacheConfig.cacheDurationMinutes) === -1 240 | ) { 241 | const response = JSON.parse(content.response); 242 | ctx.set(response.header); 243 | ctx.set('x-rendertron-cached', content.saved.toUTCString()); 244 | ctx.status = response.status; 245 | let payload: string | { type?: string } = content.payload; 246 | try { 247 | payload = JSON.parse(content.payload); 248 | } catch (e) { 249 | // swallow this. 250 | } 251 | try { 252 | if ( 253 | payload && 254 | typeof payload === 'object' && 255 | payload.type === 'Buffer' 256 | ) { 257 | ctx.body = Buffer.from(payload); 258 | } else { 259 | ctx.body = payload; 260 | } 261 | return; 262 | } catch (error) { 263 | console.log( 264 | 'Erroring parsing cache contents, falling back to normal render' 265 | ); 266 | } 267 | } 268 | } 269 | 270 | await next(); 271 | 272 | if (ctx.status === 200) { 273 | cacheContent(key, ctx); 274 | } 275 | }.bind(this); 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /src/index.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 24 | 25 | Rendertron 26 | 30 | 31 | 35 | 39 | 40 | 64 | 65 | 223 | 224 | 225 | 226 |

Rendertron

227 | 228 | 238 | 239 |
240 | 244 | 248 | 252 | 253 | 257 | 258 |
259 | 260 | 307 | 308 | 309 | -------------------------------------------------------------------------------- /src/memory-cache.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | not 6 | * use this file except in compliance with the License. You may obtain a copy 7 | of 8 | * the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | * License for the specific language governing permissions and limitations 16 | under 17 | * the License. 18 | */ 19 | 20 | 'use strict'; 21 | 22 | import Koa from 'koa'; 23 | import { Config, ConfigManager } from './config'; 24 | 25 | type CacheEntry = { 26 | saved: Date; 27 | expires: Date; 28 | headers: string; 29 | payload: string; 30 | }; 31 | 32 | // implements a cache that uses the "least-recently used" strategy to clear unused elements. 33 | export class MemoryCache { 34 | private store: Map = new Map(); 35 | private config: Config = ConfigManager.config; 36 | 37 | async clearCache() { 38 | this.store.clear(); 39 | } 40 | 41 | cacheContent( 42 | key: string, 43 | headers: { [key: string]: string }, 44 | payload: Buffer 45 | ) { 46 | // if the cache gets too big, we evict the least recently used entry (i.e. the first value in the map) 47 | if ( 48 | this.store.size >= parseInt(this.config.cacheConfig.cacheMaxEntries) && 49 | parseInt(this.config.cacheConfig.cacheMaxEntries) !== -1 50 | ) { 51 | const keyToDelete = this.store.keys().next().value; 52 | this.store.delete(keyToDelete); 53 | } 54 | 55 | const now = new Date(); 56 | this.store.set(key, { 57 | saved: new Date(), 58 | expires: new Date( 59 | now.getTime() + 60 | parseInt(this.config.cacheConfig.cacheDurationMinutes) * 60 * 1000 61 | ), 62 | headers: JSON.stringify(headers), 63 | payload: JSON.stringify(payload), 64 | }); 65 | } 66 | 67 | getCachedContent(ctx: Koa.Context, key: string) { 68 | const now = new Date(); 69 | if (ctx.query.refreshCache) { 70 | return null; 71 | } 72 | let entry = this.store.get(key); 73 | // we need to re-insert this key to mark it as "most recently read", will remove the cache if expired 74 | if (entry) { 75 | // if the cache is expired, delete and recreate 76 | if ( 77 | entry.expires.getTime() <= now.getTime() && 78 | parseInt(this.config.cacheConfig.cacheDurationMinutes) !== -1 79 | ) { 80 | this.store.delete(key); 81 | entry = undefined; 82 | } else { 83 | this.store.delete(key); 84 | this.store.set(key, entry); 85 | } 86 | } 87 | return entry; 88 | } 89 | 90 | removeEntry(key: string) { 91 | this.store.delete(key); 92 | } 93 | 94 | sanitizeKey(key: string) { 95 | // Cache based on full URL. This means requests with different params are 96 | // cached separately (except for refreshCache parameter 97 | let cacheKey = key.replace(/&?refreshCache=(?:true|false)&?/i, ''); 98 | 99 | if (cacheKey.charAt(cacheKey.length - 1) === '?') { 100 | cacheKey = cacheKey.slice(0, -1); 101 | } 102 | 103 | // remove /render/ from key, only at the start 104 | if (cacheKey.startsWith('/render/')) { 105 | cacheKey = cacheKey.substring(8); 106 | } 107 | 108 | // remove trailing slash from key 109 | cacheKey = cacheKey.replace(/\/$/, ''); 110 | return cacheKey 111 | } 112 | 113 | middleware() { 114 | return this.handleRequest.bind(this); 115 | } 116 | 117 | invalidateHandler() { 118 | return this.handleInvalidateRequest.bind(this); 119 | } 120 | 121 | private async handleInvalidateRequest(ctx: Koa.Context, url: string) { 122 | this.removeEntry(url); 123 | ctx.status = 200; 124 | } 125 | 126 | private async handleRequest(ctx: Koa.Context, next: () => Promise) { 127 | // Cache based on full URL. This means requests with different params are 128 | // cached separately. 129 | 130 | const cacheKey = this.sanitizeKey(ctx.url); 131 | const cachedContent = this.getCachedContent(ctx, cacheKey); 132 | if (cachedContent) { 133 | const headers = JSON.parse(cachedContent.headers); 134 | ctx.set(headers); 135 | ctx.set('x-rendertron-cached', cachedContent.saved.toUTCString()); 136 | try { 137 | let payload = JSON.parse(cachedContent.payload); 138 | if ( 139 | payload && 140 | typeof payload === 'object' && 141 | payload.type === 'Buffer' 142 | ) { 143 | payload = Buffer.from(payload); 144 | } 145 | ctx.body = payload; 146 | return; 147 | } catch (error) { 148 | console.log( 149 | 'Erroring parsing cache contents, falling back to normal render' 150 | ); 151 | } 152 | } 153 | 154 | await next(); 155 | 156 | if (ctx.status === 200) { 157 | this.cacheContent(cacheKey, ctx.response.headers, ctx.body); 158 | } 159 | } 160 | 161 | clearAllCacheHandler() { 162 | return this.handleClearAllCacheRequest.bind(this); 163 | } 164 | 165 | private async handleClearAllCacheRequest(ctx: Koa.Context) { 166 | this.clearCache(); 167 | ctx.status = 200; 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/renderer.ts: -------------------------------------------------------------------------------- 1 | import puppeteer, { ScreenshotOptions } from 'puppeteer'; 2 | import url from 'url'; 3 | import { dirname } from 'path'; 4 | 5 | import { Config } from './config'; 6 | 7 | type SerializedResponse = { 8 | status: number; 9 | customHeaders: Map; 10 | content: string; 11 | }; 12 | 13 | type ViewportDimensions = { 14 | width: number; 15 | height: number; 16 | }; 17 | 18 | const MOBILE_USERAGENT = 19 | 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Mobile Safari/537.36'; 20 | 21 | /** 22 | * Wraps Puppeteer's interface to Headless Chrome to expose high level rendering 23 | * APIs that are able to handle web components and PWAs. 24 | */ 25 | export class Renderer { 26 | private browser: puppeteer.Browser; 27 | private config: Config; 28 | 29 | constructor(browser: puppeteer.Browser, config: Config) { 30 | this.browser = browser; 31 | this.config = config; 32 | } 33 | 34 | private restrictRequest(requestUrl: string): boolean { 35 | const parsedUrl = url.parse(requestUrl); 36 | 37 | if (parsedUrl.hostname && parsedUrl.hostname.match(/\.internal$/)) { 38 | return true; 39 | } 40 | 41 | if (this.config.restrictedUrlPattern && requestUrl.match(new RegExp(this.config.restrictedUrlPattern))) { 42 | return true; 43 | } 44 | 45 | return false; 46 | } 47 | 48 | async serialize( 49 | requestUrl: string, 50 | isMobile: boolean, 51 | timezoneId?: string 52 | ): Promise { 53 | /** 54 | * Executed on the page after the page has loaded. Strips script and 55 | * import tags to prevent further loading of resources. 56 | */ 57 | function stripPage() { 58 | // Strip only script tags that contain JavaScript (either no type attribute or one that contains "javascript") 59 | const elements = document.querySelectorAll( 60 | 'script:not([type]), script[type*="javascript"], script[type="module"], link[rel=import]' 61 | ); 62 | for (const e of Array.from(elements)) { 63 | e.remove(); 64 | } 65 | } 66 | 67 | /** 68 | * Injects a tag which allows other resources to load. This 69 | * has no effect on serialised output, but allows it to verify render 70 | * quality. 71 | */ 72 | function injectBaseHref(origin: string, directory: string) { 73 | const bases = document.head.querySelectorAll('base'); 74 | if (bases.length) { 75 | // Patch existing if it is relative. 76 | const existingBase = bases[0].getAttribute('href') || ''; 77 | if (existingBase.startsWith('/')) { 78 | // check if is only "/" if so add the origin only 79 | if (existingBase === '/') { 80 | bases[0].setAttribute('href', origin); 81 | } else { 82 | bases[0].setAttribute('href', origin + existingBase); 83 | } 84 | } 85 | } else { 86 | // Only inject if it doesn't already exist. 87 | const base = document.createElement('base'); 88 | // Base url is the current directory 89 | base.setAttribute('href', origin + directory); 90 | document.head.insertAdjacentElement('afterbegin', base); 91 | } 92 | } 93 | 94 | const page = await this.browser.newPage(); 95 | 96 | // Page may reload when setting isMobile 97 | // https://github.com/GoogleChrome/puppeteer/blob/v1.10.0/docs/api.md#pagesetviewportviewport 98 | await page.setViewport({ 99 | width: this.config.width, 100 | height: this.config.height, 101 | isMobile, 102 | }); 103 | 104 | if (isMobile) { 105 | page.setUserAgent(MOBILE_USERAGENT); 106 | } 107 | 108 | if (timezoneId) { 109 | try { 110 | await page.emulateTimezone(timezoneId); 111 | } catch (e) { 112 | if (e.message.includes('Invalid timezone')) { 113 | return { 114 | status: 400, 115 | customHeaders: new Map(), 116 | content: 'Invalid timezone id', 117 | }; 118 | } 119 | } 120 | } 121 | 122 | await page.setExtraHTTPHeaders(this.config.reqHeaders); 123 | 124 | page.evaluateOnNewDocument('customElements.forcePolyfill = true'); 125 | page.evaluateOnNewDocument('ShadyDOM = {force: true}'); 126 | page.evaluateOnNewDocument('ShadyCSS = {shimcssproperties: true}'); 127 | 128 | await page.setRequestInterception(true); 129 | 130 | page.on('request', (interceptedRequest: puppeteer.HTTPRequest) => { 131 | if (this.restrictRequest(interceptedRequest.url())) { 132 | interceptedRequest.abort(); 133 | } else { 134 | interceptedRequest.continue(); 135 | } 136 | }); 137 | 138 | let response: puppeteer.HTTPResponse | null = null; 139 | // Capture main frame response. This is used in the case that rendering 140 | // times out, which results in puppeteer throwing an error. This allows us 141 | // to return a partial response for what was able to be rendered in that 142 | // time frame. 143 | page.on('response', (r: puppeteer.HTTPResponse) => { 144 | if (!response) { 145 | response = r; 146 | } 147 | }); 148 | 149 | try { 150 | // Navigate to page. Wait until there are no oustanding network requests. 151 | response = await page.goto(requestUrl, { 152 | timeout: this.config.timeout, 153 | waitUntil: 'networkidle0', 154 | }); 155 | } catch (e) { 156 | console.error(e); 157 | } 158 | 159 | if (!response) { 160 | console.error('response does not exist'); 161 | // This should only occur when the page is about:blank. See 162 | // https://github.com/GoogleChrome/puppeteer/blob/v1.5.0/docs/api.md#pagegotourl-options. 163 | await page.close(); 164 | if (this.config.closeBrowser) { 165 | await this.browser.close(); 166 | } 167 | return { status: 400, customHeaders: new Map(), content: '' }; 168 | } 169 | 170 | // Disable access to compute metadata. See 171 | // https://cloud.google.com/compute/docs/storing-retrieving-metadata. 172 | if (response.headers()['metadata-flavor'] === 'Google') { 173 | await page.close(); 174 | if (this.config.closeBrowser) { 175 | await this.browser.close(); 176 | } 177 | return { status: 403, customHeaders: new Map(), content: '' }; 178 | } 179 | 180 | // Set status to the initial server's response code. Check for a tag which overrides the status 182 | // code. 183 | let statusCode = response.status(); 184 | const newStatusCode = await page 185 | .$eval('meta[name="render:status_code"]', (element) => 186 | parseInt(element.getAttribute('content') || '') 187 | ) 188 | .catch(() => undefined); 189 | // On a repeat visit to the same origin, browser cache is enabled, so we may 190 | // encounter a 304 Not Modified. Instead we'll treat this as a 200 OK. 191 | if (statusCode === 304) { 192 | statusCode = 200; 193 | } 194 | // Original status codes which aren't 200 always return with that status 195 | // code, regardless of meta tags. 196 | if (statusCode === 200 && newStatusCode) { 197 | statusCode = newStatusCode; 198 | } 199 | 200 | // Check for tag to allow a custom header in the response 201 | // to the crawlers. 202 | const customHeaders = await page 203 | .$eval('meta[name="render:header"]', (element) => { 204 | const result = new Map(); 205 | const header = element.getAttribute('content'); 206 | if (header) { 207 | const i = header.indexOf(':'); 208 | if (i !== -1) { 209 | result.set( 210 | header.substr(0, i).trim(), 211 | header.substring(i + 1).trim() 212 | ); 213 | } 214 | } 215 | return JSON.stringify([...result]); 216 | }) 217 | .catch(() => undefined); 218 | 219 | // Remove script & import tags. 220 | await page.evaluate(stripPage); 221 | // Inject tag with the origin of the request (ie. no path). 222 | const parsedUrl = url.parse(requestUrl); 223 | await page.evaluate( 224 | injectBaseHref, 225 | `${parsedUrl.protocol}//${parsedUrl.host}`, 226 | `${dirname(parsedUrl.pathname || '')}` 227 | ); 228 | 229 | // Serialize page. 230 | const result = (await page.content()) as string; 231 | 232 | await page.close(); 233 | if (this.config.closeBrowser) { 234 | await this.browser.close(); 235 | } 236 | return { 237 | status: statusCode, 238 | customHeaders: customHeaders 239 | ? new Map(JSON.parse(customHeaders)) 240 | : new Map(), 241 | content: result, 242 | }; 243 | } 244 | 245 | async screenshot( 246 | url: string, 247 | isMobile: boolean, 248 | dimensions: ViewportDimensions, 249 | options?: ScreenshotOptions, 250 | timezoneId?: string 251 | ): Promise { 252 | const page = await this.browser.newPage(); 253 | 254 | // Page may reload when setting isMobile 255 | // https://github.com/GoogleChrome/puppeteer/blob/v1.10.0/docs/api.md#pagesetviewportviewport 256 | await page.setViewport({ 257 | width: dimensions.width, 258 | height: dimensions.height, 259 | isMobile, 260 | }); 261 | 262 | if (isMobile) { 263 | page.setUserAgent(MOBILE_USERAGENT); 264 | } 265 | 266 | await page.setRequestInterception(true); 267 | 268 | page.addListener('request', (interceptedRequest: puppeteer.HTTPRequest) => { 269 | if (this.restrictRequest(interceptedRequest.url())) { 270 | interceptedRequest.abort(); 271 | } else { 272 | interceptedRequest.continue(); 273 | } 274 | }); 275 | 276 | if (timezoneId) { 277 | await page.emulateTimezone(timezoneId); 278 | } 279 | 280 | let response: puppeteer.HTTPResponse | null = null; 281 | 282 | try { 283 | // Navigate to page. Wait until there are no oustanding network requests. 284 | response = await page.goto(url, { 285 | timeout: this.config.timeout, 286 | waitUntil: 'networkidle0', 287 | }); 288 | } catch (e) { 289 | console.error(e); 290 | } 291 | 292 | if (!response) { 293 | await page.close(); 294 | if (this.config.closeBrowser) { 295 | await this.browser.close(); 296 | } 297 | throw new ScreenshotError('NoResponse'); 298 | } 299 | 300 | // Disable access to compute metadata. See 301 | // https://cloud.google.com/compute/docs/storing-retrieving-metadata. 302 | if (response.headers()['metadata-flavor'] === 'Google') { 303 | await page.close(); 304 | if (this.config.closeBrowser) { 305 | await this.browser.close(); 306 | } 307 | throw new ScreenshotError('Forbidden'); 308 | } 309 | 310 | // Must be jpeg & binary format. 311 | const screenshotOptions: ScreenshotOptions = { 312 | type: options?.type || 'jpeg', 313 | encoding: options?.encoding || 'binary', 314 | }; 315 | // Screenshot returns a buffer based on specified encoding above. 316 | // https://github.com/GoogleChrome/puppeteer/blob/v1.8.0/docs/api.md#pagescreenshotoptions 317 | const buffer = (await page.screenshot(screenshotOptions)) as Buffer; 318 | await page.close(); 319 | if (this.config.closeBrowser) { 320 | await this.browser.close(); 321 | } 322 | return buffer; 323 | } 324 | } 325 | 326 | type ErrorType = 'Forbidden' | 'NoResponse'; 327 | 328 | export class ScreenshotError extends Error { 329 | type: ErrorType; 330 | 331 | constructor(type: ErrorType) { 332 | super(type); 333 | 334 | this.name = this.constructor.name; 335 | 336 | this.type = type; 337 | } 338 | } 339 | -------------------------------------------------------------------------------- /src/rendertron.ts: -------------------------------------------------------------------------------- 1 | import Koa from 'koa'; 2 | import bodyParser from 'koa-bodyparser'; 3 | import koaCompress from 'koa-compress'; 4 | import route from 'koa-route'; 5 | import koaSend from 'koa-send'; 6 | import koaLogger from 'koa-logger'; 7 | import path from 'path'; 8 | import puppeteer from 'puppeteer'; 9 | import url from 'url'; 10 | 11 | import { Renderer, ScreenshotError } from './renderer'; 12 | import { Config, ConfigManager } from './config'; 13 | 14 | /** 15 | * Rendertron rendering service. This runs the server which routes rendering 16 | * requests through to the renderer. 17 | */ 18 | export class Rendertron { 19 | app: Koa = new Koa(); 20 | private config: Config = ConfigManager.config; 21 | private renderer: Renderer | undefined; 22 | private port = process.env.PORT || null; 23 | private host = process.env.HOST || null; 24 | 25 | async createRenderer(config: Config) { 26 | const browser = await puppeteer.launch({ args: config.puppeteerArgs }); 27 | 28 | browser.on('disconnected', () => { 29 | this.createRenderer(config); 30 | }); 31 | 32 | this.renderer = new Renderer(browser, config); 33 | } 34 | 35 | async initialize(config?: Config) { 36 | // Load config 37 | this.config = config || (await ConfigManager.getConfiguration()); 38 | 39 | this.port = this.port || this.config.port; 40 | this.host = this.host || this.config.host; 41 | 42 | await this.createRenderer(this.config); 43 | 44 | this.app.use(koaLogger()); 45 | 46 | this.app.use(koaCompress()); 47 | 48 | this.app.use(bodyParser()); 49 | 50 | this.app.use( 51 | route.get('/', async (ctx: Koa.Context) => { 52 | await koaSend(ctx, 'index.html', { 53 | root: path.resolve(__dirname, '../src'), 54 | }); 55 | }) 56 | ); 57 | this.app.use( 58 | route.get('/_ah/health', (ctx: Koa.Context) => (ctx.body = 'OK')) 59 | ); 60 | 61 | // Optionally enable cache for rendering requests. 62 | if (this.config.cache === 'datastore') { 63 | const { DatastoreCache } = await import('./datastore-cache'); 64 | const datastoreCache = new DatastoreCache(); 65 | this.app.use( 66 | route.get('/invalidate/:url(.*)', datastoreCache.invalidateHandler()) 67 | ); 68 | this.app.use( 69 | route.get('/invalidate/', datastoreCache.clearAllCacheHandler()) 70 | ); 71 | this.app.use(datastoreCache.middleware()); 72 | } else if (this.config.cache === 'memory') { 73 | const { MemoryCache } = await import('./memory-cache'); 74 | const memoryCache = new MemoryCache(); 75 | this.app.use( 76 | route.get('/invalidate/:url(.*)', memoryCache.invalidateHandler()) 77 | ); 78 | this.app.use( 79 | route.get('/invalidate/', memoryCache.clearAllCacheHandler()) 80 | ); 81 | this.app.use(memoryCache.middleware()); 82 | } else if (this.config.cache === 'filesystem') { 83 | const { FilesystemCache } = await import('./filesystem-cache'); 84 | const filesystemCache = new FilesystemCache(this.config); 85 | this.app.use( 86 | route.get('/invalidate/:url(.*)', filesystemCache.invalidateHandler()) 87 | ); 88 | this.app.use( 89 | route.get('/invalidate/', filesystemCache.clearAllCacheHandler()) 90 | ); 91 | this.app.use(new FilesystemCache(this.config).middleware()); 92 | } 93 | 94 | this.app.use( 95 | route.get('/render/:url(.*)', this.handleRenderRequest.bind(this)) 96 | ); 97 | this.app.use( 98 | route.get('/screenshot/:url(.*)', this.handleScreenshotRequest.bind(this)) 99 | ); 100 | this.app.use( 101 | route.post( 102 | '/screenshot/:url(.*)', 103 | this.handleScreenshotRequest.bind(this) 104 | ) 105 | ); 106 | 107 | return this.app.listen(+this.port, this.host, () => { 108 | console.log(`Listening on port ${this.port}`); 109 | }); 110 | } 111 | 112 | /** 113 | * Checks whether or not the URL is valid. For example, we don't want to allow 114 | * the requester to read the file system via Chrome. 115 | */ 116 | restricted(href: string): boolean { 117 | const parsedUrl = url.parse(href); 118 | const protocol = parsedUrl.protocol || ''; 119 | 120 | if (!protocol.match(/^https?/)) { 121 | return true; 122 | } 123 | 124 | if (parsedUrl.hostname && parsedUrl.hostname.match(/\.internal$/)) { 125 | return true; 126 | } 127 | 128 | if (!this.config.renderOnly.length) { 129 | return false; 130 | } 131 | 132 | for (let i = 0; i < this.config.renderOnly.length; i++) { 133 | if (href.startsWith(this.config.renderOnly[i])) { 134 | return false; 135 | } 136 | } 137 | 138 | return true; 139 | } 140 | 141 | async handleRenderRequest(ctx: Koa.Context, url: string) { 142 | if (!this.renderer) { 143 | throw new Error('No renderer initalized yet.'); 144 | } 145 | 146 | if (this.restricted(url)) { 147 | ctx.status = 403; 148 | return; 149 | } 150 | 151 | const mobileVersion = 'mobile' in ctx.query ? true : false; 152 | 153 | const serialized = await this.renderer.serialize( 154 | url, 155 | mobileVersion, 156 | ctx.query.timezoneId 157 | ); 158 | 159 | for (const key in this.config.headers) { 160 | ctx.set(key, this.config.headers[key]); 161 | } 162 | 163 | // Mark the response as coming from Rendertron. 164 | ctx.set('x-renderer', 'rendertron'); 165 | // Add custom headers to the response like 'Location' 166 | serialized.customHeaders.forEach((value: string, key: string) => 167 | ctx.set(key, value) 168 | ); 169 | ctx.status = serialized.status; 170 | ctx.body = serialized.content; 171 | } 172 | 173 | async handleScreenshotRequest(ctx: Koa.Context, url: string) { 174 | if (!this.renderer) { 175 | throw new Error('No renderer initalized yet.'); 176 | } 177 | 178 | if (this.restricted(url)) { 179 | ctx.status = 403; 180 | return; 181 | } 182 | 183 | const dimensions = { 184 | width: Number(ctx.query['width']) || this.config.width, 185 | height: Number(ctx.query['height']) || this.config.height, 186 | }; 187 | 188 | const mobileVersion = 'mobile' in ctx.query ? true : false; 189 | 190 | try { 191 | const img = await this.renderer.screenshot( 192 | url, 193 | mobileVersion, 194 | dimensions, 195 | ctx.query.timezoneId 196 | ); 197 | 198 | for (const key in this.config.headers) { 199 | ctx.set(key, this.config.headers[key]); 200 | } 201 | 202 | ctx.set('Content-Type', 'image/jpeg'); 203 | ctx.set('Content-Length', img.length.toString()); 204 | ctx.body = img; 205 | } catch (error) { 206 | const err = error as ScreenshotError; 207 | ctx.status = err.type === 'Forbidden' ? 403 : 500; 208 | } 209 | } 210 | } 211 | 212 | async function logUncaughtError(error: Error) { 213 | console.error('Uncaught exception'); 214 | console.error(error); 215 | process.exit(1); 216 | } 217 | 218 | // The type for the unhandleRejection handler is set to contain Promise, 219 | // so we disable that linter rule for the next line 220 | // eslint-disable-next-line @typescript-eslint/no-unused-vars, @typescript-eslint/no-explicit-any 221 | async function logUnhandledRejection(reason: unknown, _: Promise) { 222 | console.error('Unhandled rejection'); 223 | console.error(reason); 224 | process.exit(1); 225 | } 226 | 227 | // Start rendertron if not running inside tests. 228 | if (!module.parent) { 229 | const rendertron = new Rendertron(); 230 | rendertron.initialize(); 231 | 232 | process.on('uncaughtException', logUncaughtError); 233 | process.on('unhandledRejection', logUnhandledRejection); 234 | } 235 | -------------------------------------------------------------------------------- /src/test/app-test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | import test, { ExecutionContext } from 'ava'; 18 | import Koa from 'koa'; 19 | import koaStatic from 'koa-static'; 20 | import path from 'path'; 21 | import request from 'supertest'; 22 | import fs from 'fs'; 23 | import os from 'os'; 24 | 25 | import { Rendertron } from '../rendertron'; 26 | 27 | const app = new Koa(); 28 | app.use(koaStatic(path.resolve(__dirname, '../../test-resources'))); 29 | 30 | const testBase = 'http://localhost:1234/'; 31 | 32 | const rendertron = new Rendertron(); 33 | 34 | let server: request.SuperTest; 35 | 36 | test.before(async () => { 37 | server = request(await rendertron.initialize()); 38 | await app.listen(1234); 39 | }); 40 | 41 | test('health check responds correctly', async (t: ExecutionContext) => { 42 | const res = await server.get('/_ah/health'); 43 | t.is(res.status, 200); 44 | }); 45 | 46 | test('renders basic script', async (t: ExecutionContext) => { 47 | const res = await server.get(`/render/${testBase}basic-script.html`); 48 | t.is(res.status, 200); 49 | t.true(res.text.indexOf('document-title') !== -1); 50 | t.is(res.header['x-renderer'], 'rendertron'); 51 | }); 52 | 53 | test('renders script after page load event', async (t: ExecutionContext) => { 54 | const res = await server.get(`/render/${testBase}script-after-load.html`); 55 | t.is(res.status, 200); 56 | t.true(res.text.indexOf('injectedElement') !== -1); 57 | }); 58 | 59 | test('renders HTML docType declaration', async (t: ExecutionContext) => { 60 | const res = await server.get(`/render/${testBase}include-doctype.html`); 61 | t.is(res.status, 200); 62 | t.true(res.text.indexOf('') !== -1); 63 | }); 64 | 65 | test('sets the correct base URL for a subfolder', async (t: ExecutionContext) => { 66 | const res = await server.get(`/render/${testBase}subfolder/index.html`); 67 | const matches = res.text.match(''); 68 | const baseUrl = matches ? matches[1] : ''; 69 | t.is(baseUrl, `${testBase}subfolder`); 70 | }); 71 | 72 | test('sets the correct base URL for the root folder', async (t: ExecutionContext) => { 73 | const res = await server.get(`/render/${testBase}basic-script.html`); 74 | const matches = res.text.match(''); 75 | const baseUrl = matches ? matches[1] : ''; 76 | t.is(baseUrl, `${testBase}`); 77 | }); 78 | 79 | test('sets the correct base URL for an already defined base as /', async (t: ExecutionContext) => { 80 | const res = await server.get(`/render/${testBase}include-base.html`); 81 | const matches = res.text.match(''); 82 | const baseUrl = matches ? matches[1] : ''; 83 | t.is(baseUrl, `${testBase.slice(0, -1)}`); 84 | }); 85 | 86 | test('sets the correct base URL for an already defined base as directory', async (t: ExecutionContext) => { 87 | const res = await server.get( 88 | `/render/${testBase}include-base-as-directory.html` 89 | ); 90 | const matches = res.text.match(''); 91 | const baseUrl = matches ? matches[1] : ''; 92 | t.is(baseUrl, `${testBase}dir1`); 93 | }); 94 | 95 | // This test is failing as the polyfills (shady polyfill & scoping shim) are not 96 | // yet injected properly. 97 | test.failing( 98 | 'renders shadow DOM - no polyfill', 99 | async (t: ExecutionContext) => { 100 | const res = await server.get( 101 | `/render/${testBase}shadow-dom-no-polyfill.html?wc-inject-shadydom=true` 102 | ); 103 | t.is(res.status, 200); 104 | t.true(res.text.indexOf('shadow-root-text') !== -1); 105 | } 106 | ); 107 | 108 | test('renders shadow DOM - polyfill loader', async (t: ExecutionContext) => { 109 | const res = await server.get( 110 | `/render/${testBase}shadow-dom-polyfill-loader.html?wc-inject-shadydom=true` 111 | ); 112 | t.is(res.status, 200); 113 | t.true(res.text.indexOf('shadow-root-text') !== -1); 114 | }); 115 | 116 | test('renders shadow DOM - polyfill loader - different flag', async (t: ExecutionContext) => { 117 | const res = await server.get( 118 | `/render/${testBase}shadow-dom-polyfill-loader.html?wc-inject-shadydom` 119 | ); 120 | t.is(res.status, 200); 121 | t.true(res.text.indexOf('shadow-root-text') !== -1); 122 | }); 123 | 124 | test('renders shadow DOM - webcomponents-lite.js polyfill', async (t: ExecutionContext) => { 125 | const res = await server.get( 126 | `/render/${testBase}shadow-dom-polyfill-all.html?wc-inject-shadydom=true` 127 | ); 128 | t.is(res.status, 200); 129 | t.true(res.text.indexOf('shadow-root-text') !== -1); 130 | }); 131 | 132 | test('script tags and link[rel=import] tags are stripped', async (t: ExecutionContext) => { 133 | const res = await server.get( 134 | `/render/${testBase}include-script.html?wc-inject-polyfills=true` 135 | ); 136 | t.is(res.status, 200); 137 | t.false(res.text.indexOf('script src') !== -1); 138 | t.true(res.text.indexOf('injectedElement') !== -1); 139 | t.false(res.text.indexOf('link rel') !== -1); 140 | // TODO: Fix the webcomponent behaviour in newer chrome releases 141 | //t.true(res.text.indexOf('element-text') !== -1); 142 | }); 143 | 144 | test('script tags for JSON-LD are not stripped', async (t: ExecutionContext) => { 145 | const res = await server.get(`/render/${testBase}include-json-ld.html`); 146 | t.is(res.status, 200); 147 | t.false(res.text.indexOf('script src') !== -1); 148 | t.true(res.text.indexOf('application/ld+json') !== -1); 149 | t.false(res.text.indexOf('javascript') !== -1); 150 | }); 151 | 152 | test('server status code should be forwarded', async (t: ExecutionContext) => { 153 | const res = await server.get(`/render/${testBase}404`); 154 | t.is(res.status, 404); 155 | }); 156 | 157 | test('http status code should be able to be set via a meta tag', async (t: ExecutionContext) => { 158 | const testFile = 'http-meta-status-code.html'; 159 | const res = await server.get( 160 | `/render/${testBase}${testFile}?wc-inject-shadydom=true` 161 | ); 162 | t.is(res.status, 400); 163 | }); 164 | 165 | test('http status codes need to be respected from top to bottom', async (t: ExecutionContext) => { 166 | const testFile = 'http-meta-status-code-multiple.html'; 167 | const res = await server.get( 168 | `/render/${testBase}${testFile}?wc-inject-shadydom=true` 169 | ); 170 | t.is(res.status, 401); 171 | }); 172 | 173 | test('screenshot is an image', async (t: ExecutionContext) => { 174 | const res = await server.post(`/screenshot/${testBase}basic-script.html`); 175 | t.is(res.status, 200); 176 | t.is(res.header['content-type'], 'image/jpeg'); 177 | t.true(res.body.length > 300); 178 | t.is(res.body.length, parseInt(res.header['content-length'])); 179 | }); 180 | 181 | test('screenshot accepts options', async (t: ExecutionContext) => { 182 | const res = await server 183 | .post(`/screenshot/${testBase}basic-script.html`) 184 | .send({ 185 | clip: { x: 100, y: 100, width: 100, height: 100 }, 186 | path: 'test.jpeg', 187 | }); 188 | t.is(res.status, 200); 189 | t.is(res.header['content-type'], 'image/jpeg'); 190 | t.true(res.body.length > 300); 191 | t.is(res.body.length, parseInt(res.header['content-length'])); 192 | }); 193 | 194 | test('invalid url fails', async (t: ExecutionContext) => { 195 | const res = await server.get(`/render/abc`); 196 | t.is(res.status, 403); 197 | }); 198 | 199 | test('unknown url fails', async (t: ExecutionContext) => { 200 | const res = await server.get(`/render/http://unknown.blah.com`); 201 | t.is(res.status, 400); 202 | }); 203 | 204 | test('file url fails', async (t: ExecutionContext) => { 205 | const res = await server.get(`/render/file:///dev/fd/0`); 206 | t.is(res.status, 403); 207 | }); 208 | 209 | test('file url fails for screenshot', async (t: ExecutionContext) => { 210 | const res = await server.get(`/screenshot/file:///dev/fd/0`); 211 | t.is(res.status, 403); 212 | }); 213 | 214 | test('appengine internal url fails', async (t: ExecutionContext) => { 215 | const res = await server.get( 216 | `/render/http://metadata.google.internal/computeMetadata/v1beta1/instance/service-accounts/default/token` 217 | ); 218 | t.is(res.status, 403); 219 | }); 220 | 221 | test('appengine internal url fails for screenshot', async (t: ExecutionContext) => { 222 | const res = await server.get( 223 | `/screenshot/http://metadata.google.internal/computeMetadata/v1beta1/instance/service-accounts/default/token` 224 | ); 225 | t.is(res.status, 403); 226 | }); 227 | 228 | test.failing( 229 | 'explicit render event ends early', 230 | async (t: ExecutionContext) => { 231 | const res = await server.get( 232 | `/render/${testBase}explicit-render-event.html` 233 | ); 234 | t.is(res.status, 200); 235 | t.true(res.text.indexOf('async loaded') !== -1); 236 | } 237 | ); 238 | 239 | test('whitelist ensures other urls do not get rendered', async (t: ExecutionContext) => { 240 | const mockConfig = { 241 | cache: 'memory' as const, 242 | cacheConfig: { 243 | cacheDurationMinutes: '120', 244 | cacheMaxEntries: '50', 245 | }, 246 | timeout: 10000, 247 | port: '3000', 248 | host: '0.0.0.0', 249 | width: 1000, 250 | height: 1000, 251 | reqHeaders: {}, 252 | headers: {}, 253 | puppeteerArgs: ['--no-sandbox'], 254 | renderOnly: [testBase], 255 | closeBrowser: false, 256 | restrictedUrlPattern: null, 257 | }; 258 | const server = request(await new Rendertron().initialize(mockConfig)); 259 | 260 | let res = await server.get(`/render/${testBase}basic-script.html`); 261 | t.is(res.status, 200); 262 | 263 | res = await server.get(`/render/http://anotherDomain.com`); 264 | t.is(res.status, 403); 265 | }); 266 | 267 | test('unknown url fails safely on screenshot', async (t: ExecutionContext) => { 268 | const res = await server.get(`/render/http://unknown.blah.com`); 269 | t.is(res.status, 400); 270 | }); 271 | 272 | test('endpont for invalidating memory cache works if configured', async (t: ExecutionContext) => { 273 | const mockConfig = { 274 | cache: 'memory' as const, 275 | cacheConfig: { 276 | cacheDurationMinutes: '120', 277 | cacheMaxEntries: '50', 278 | }, 279 | timeout: 10000, 280 | port: '3000', 281 | host: '0.0.0.0', 282 | width: 1000, 283 | height: 1000, 284 | reqHeaders: {}, 285 | headers: {}, 286 | puppeteerArgs: ['--no-sandbox'], 287 | renderOnly: [], 288 | closeBrowser: false, 289 | restrictedUrlPattern: null, 290 | }; 291 | const cached_server = request(await new Rendertron().initialize(mockConfig)); 292 | const test_url = `${testBase}basic-script.html`; 293 | await app.listen(1235); 294 | // Make a request which is not in cache 295 | let res = await cached_server.get(`/render/${test_url}`); 296 | t.is(res.status, 200); 297 | t.true(res.text.indexOf('document-title') !== -1); 298 | t.is(res.header['x-renderer'], 'rendertron'); 299 | t.true(res.header['x-rendertron-cached'] == null); 300 | 301 | // Ensure that it is cached 302 | res = await cached_server.get(`/render/${test_url}`); 303 | t.is(res.status, 200); 304 | t.true(res.text.indexOf('document-title') !== -1); 305 | t.is(res.header['x-renderer'], 'rendertron'); 306 | t.true(res.header['x-rendertron-cached'] != null); 307 | 308 | // Invalidate cache and ensure it is not cached 309 | res = await cached_server.get(`/invalidate/${test_url}`); 310 | res = await cached_server.get(`/render/${test_url}`); 311 | t.is(res.status, 200); 312 | t.true(res.text.indexOf('document-title') !== -1); 313 | t.is(res.header['x-renderer'], 'rendertron'); 314 | t.true(res.header['x-rendertron-cached'] == null); 315 | }); 316 | 317 | test('endpont for invalidating filesystem cache works if configured', async (t: ExecutionContext) => { 318 | const mock_config = { 319 | cache: 'filesystem' as const, 320 | cacheConfig: { 321 | cacheDurationMinutes: '120', 322 | cacheMaxEntries: '50', 323 | snapshotDir: path.join(os.tmpdir(), 'rendertron-test-cache'), 324 | }, 325 | timeout: 10000, 326 | port: '3000', 327 | host: '0.0.0.0', 328 | width: 1000, 329 | height: 1000, 330 | reqHeaders: {}, 331 | headers: {}, 332 | puppeteerArgs: ['--no-sandbox'], 333 | renderOnly: [], 334 | closeBrowser: false, 335 | restrictedUrlPattern: null, 336 | }; 337 | const cached_server = request(await new Rendertron().initialize(mock_config)); 338 | const test_url = `/render/${testBase}basic-script.html`; 339 | await app.listen(1236); 340 | // Make a request which is not in cache 341 | let res = await cached_server.get(test_url); 342 | t.is(res.status, 200); 343 | t.true(res.text.indexOf('document-title') !== -1); 344 | t.is(res.header['x-renderer'], 'rendertron'); 345 | t.true(res.header['x-rendertron-cached'] == null); 346 | 347 | // Ensure that it is cached 348 | res = await cached_server.get(test_url); 349 | t.is(res.status, 200); 350 | t.true(res.text.indexOf('document-title') !== -1); 351 | t.is(res.header['x-renderer'], 'rendertron'); 352 | t.true(res.header['x-rendertron-cached'] != null); 353 | 354 | // Invalidate cache and ensure it is not cached 355 | res = await cached_server.get(`/invalidate/${testBase}basic-script.html`); 356 | res = await cached_server.get(test_url); 357 | t.is(res.status, 200); 358 | t.true(res.text.indexOf('document-title') !== -1); 359 | t.is(res.header['x-renderer'], 'rendertron'); 360 | t.true(res.header['x-rendertron-cached'] == null); 361 | 362 | // cleanup cache to prevent future tests failing 363 | res = await cached_server.get(`/invalidate/${testBase}basic-script.html`); 364 | fs.rmdirSync(path.join(os.tmpdir(), 'rendertron-test-cache')); 365 | }); 366 | 367 | test('http header should be set via config', async (t: ExecutionContext) => { 368 | const mock_config = { 369 | cache: 'memory' as const, 370 | cacheConfig: { 371 | cacheDurationMinutes: '120', 372 | cacheMaxEntries: '50', 373 | }, 374 | timeout: 10000, 375 | port: '3000', 376 | host: '0.0.0.0', 377 | width: 1000, 378 | height: 1000, 379 | reqHeaders: { 380 | Referer: 'http://example.com/', 381 | }, 382 | headers: {}, 383 | puppeteerArgs: ['--no-sandbox'], 384 | renderOnly: [], 385 | closeBrowser: false, 386 | restrictedUrlPattern: null, 387 | }; 388 | server = request(await rendertron.initialize(mock_config)); 389 | await app.listen(1237); 390 | const res = await server.get(`/render/${testBase}request-header.html`); 391 | t.is(res.status, 200); 392 | t.true(res.text.indexOf('http://example.com/') !== -1); 393 | }); 394 | 395 | test.serial( 396 | 'endpoint for invalidating all memory cache works if configured', 397 | async (t: ExecutionContext) => { 398 | const mock_config = { 399 | cache: 'memory' as const, 400 | cacheConfig: { 401 | cacheDurationMinutes: '120', 402 | cacheMaxEntries: '50', 403 | }, 404 | timeout: 10000, 405 | port: '3000', 406 | host: '0.0.0.0', 407 | width: 1000, 408 | height: 1000, 409 | reqHeaders: { 410 | Referer: 'http://example.com/', 411 | }, 412 | headers: {}, 413 | puppeteerArgs: ['--no-sandbox'], 414 | renderOnly: [], 415 | closeBrowser: false, 416 | restrictedUrlPattern: null, 417 | }; 418 | const cached_server = request( 419 | await new Rendertron().initialize(mock_config) 420 | ); 421 | const test_url = `/render/${testBase}basic-script.html`; 422 | await app.listen(1238); 423 | // Make a request which is not in cache 424 | let res = await cached_server.get(test_url); 425 | t.is(res.status, 200); 426 | t.true(res.text.indexOf('document-title') !== -1); 427 | t.is(res.header['x-renderer'], 'rendertron'); 428 | t.true(res.header['x-rendertron-cached'] == null); 429 | 430 | // Ensure that it is cached 431 | res = await cached_server.get(test_url); 432 | t.is(res.status, 200); 433 | t.true(res.text.indexOf('document-title') !== -1); 434 | t.is(res.header['x-renderer'], 'rendertron'); 435 | t.true(res.header['x-rendertron-cached'] != null); 436 | 437 | // Invalidate cache and ensure it is not cached 438 | res = await cached_server.get(`/invalidate`); 439 | res = await cached_server.get(test_url); 440 | t.is(res.status, 200); 441 | t.true(res.text.indexOf('document-title') !== -1); 442 | t.is(res.header['x-renderer'], 'rendertron'); 443 | t.true(res.header['x-rendertron-cached'] == null); 444 | } 445 | ); 446 | 447 | test.serial( 448 | 'endpoint for invalidating all filesystem cache works if configured', 449 | async (t: ExecutionContext) => { 450 | const mock_config = { 451 | cache: 'filesystem' as const, 452 | cacheConfig: { 453 | cacheDurationMinutes: '120', 454 | cacheMaxEntries: '50', 455 | snapshotDir: path.join(os.tmpdir(), 'rendertron-test-cache'), 456 | }, 457 | timeout: 10000, 458 | port: '3000', 459 | host: '0.0.0.0', 460 | width: 1000, 461 | height: 1000, 462 | headers: {}, 463 | reqHeaders: { 464 | Referer: 'http://example.com/', 465 | }, 466 | puppeteerArgs: ['--no-sandbox'], 467 | renderOnly: [], 468 | closeBrowser: false, 469 | restrictedUrlPattern: null, 470 | }; 471 | const cached_server = request( 472 | await new Rendertron().initialize(mock_config) 473 | ); 474 | const test_url = `/render/${testBase}basic-script.html`; 475 | await app.listen(1239); 476 | // Make a request which is not in cache 477 | let res = await cached_server.get(test_url); 478 | t.is(res.status, 200); 479 | t.true(res.text.indexOf('document-title') !== -1); 480 | t.is(res.header['x-renderer'], 'rendertron'); 481 | t.true(res.header['x-rendertron-cached'] == null); 482 | 483 | // Ensure that it is cached 484 | res = await cached_server.get(test_url); 485 | t.is(res.status, 200); 486 | t.true(res.text.indexOf('document-title') !== -1); 487 | t.is(res.header['x-renderer'], 'rendertron'); 488 | t.true(res.header['x-rendertron-cached'] != null); 489 | 490 | // Invalidate cache and ensure it is not cached 491 | res = await cached_server.get(`/invalidate`); 492 | res = await cached_server.get(test_url); 493 | t.is(res.status, 200); 494 | t.true(res.text.indexOf('document-title') !== -1); 495 | t.is(res.header['x-renderer'], 'rendertron'); 496 | t.true(res.header['x-rendertron-cached'] == null); 497 | 498 | await cached_server.get(`/invalidate`); 499 | // cleanup cache to prevent future tests failing 500 | await cached_server.get(`/invalidate/`); 501 | fs.rmdirSync(path.join(os.tmpdir(), 'rendertron-test-cache')); 502 | } 503 | ); 504 | 505 | test('unknown timezone fails', async (t) => { 506 | const res = await server.get( 507 | `/render/${testBase}include-date.html?timezoneId=invalid/timezone` 508 | ); 509 | t.is(res.status, 400); 510 | }); 511 | 512 | test('known timezone applies', async (t) => { 513 | // Atlantic/Reykjavik is a timezone where GMT+0 is all-year round without Daylight Saving Time 514 | const res = await server.get( 515 | `/render/${testBase}include-date.html?timezoneId=Atlantic/Reykjavik` 516 | ); 517 | t.is(res.status, 200); 518 | t.true(res.text.indexOf('00:00:00') !== -1); 519 | 520 | const res2 = await server.get( 521 | `/render/${testBase}include-date.html?timezoneId=Australia/Perth` 522 | ); 523 | t.is(res2.status, 200); 524 | // Australia/Perth is a timezone where GMT+8 is all-year round without Daylight Saving Time 525 | t.true(res2.text.indexOf('08:00:00') !== -1); 526 | }); 527 | 528 | test('urls mathing pattern are restricted', async (t) => { 529 | const mock_config = { 530 | cache: 'filesystem' as const, 531 | cacheConfig: { 532 | cacheDurationMinutes: '120', 533 | cacheMaxEntries: '50', 534 | snapshotDir: path.join(os.tmpdir(), 'rendertron-test-cache'), 535 | }, 536 | timeout: 10000, 537 | port: '3000', 538 | host: '0.0.0.0', 539 | width: 1000, 540 | height: 1000, 541 | headers: {}, 542 | reqHeaders: { 543 | Referer: 'http://example.com/', 544 | }, 545 | puppeteerArgs: ['--no-sandbox'], 546 | renderOnly: [], 547 | closeBrowser: false, 548 | restrictedUrlPattern: '.*(\\.test.html)($|\\?)', 549 | }; 550 | const cached_server = request( 551 | await new Rendertron().initialize(mock_config) 552 | ); 553 | await app.listen(1240); 554 | // Make a restriced request 555 | let res = await cached_server.get(`/render/${testBase}restrict-test.test.html`); 556 | t.is(res.status, 400); 557 | t.is(res.header['x-renderer'], 'rendertron'); 558 | 559 | res = await cached_server.get(`/render/${testBase}restrict-test.test.html?hello=world`); 560 | t.is(res.status, 400); 561 | t.is(res.header['x-renderer'], 'rendertron'); 562 | 563 | // Non restricted calls should pass through 564 | res = await cached_server.get(`/render/${testBase}basic-script.html`); 565 | t.is(res.status, 200); 566 | t.true(res.text.indexOf('document-title') !== -1); 567 | t.is(res.header['x-renderer'], 'rendertron'); 568 | t.true(res.header['x-rendertron-cached'] == null); 569 | 570 | await cached_server.get(`/invalidate`); 571 | // cleanup cache to prevent future tests failing 572 | await cached_server.get(`/invalidate/`); 573 | fs.rmdirSync(path.join(os.tmpdir(), 'rendertron-test-cache')); 574 | }); 575 | 576 | -------------------------------------------------------------------------------- /src/test/datastore-cache-test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | 'use strict'; 18 | 19 | import test, { ExecutionContext } from 'ava'; 20 | import Koa from 'koa'; 21 | import koaCompress from 'koa-compress'; 22 | import request from 'supertest'; 23 | import route from 'koa-route'; 24 | 25 | import { DatastoreCache } from '../datastore-cache'; 26 | 27 | const app = new Koa(); 28 | const server = request(app.listen()); 29 | const cache = new DatastoreCache(); 30 | 31 | app.use(route.get('/compressed', koaCompress())); 32 | 33 | app.use(cache.middleware()); 34 | 35 | let handlerCalledCount = 0; 36 | 37 | test.before(async () => { 38 | await cache.clearCache(); 39 | }); 40 | 41 | app.use( 42 | route.get('/', (ctx: Koa.Context) => { 43 | handlerCalledCount++; 44 | ctx.body = `Called ${handlerCalledCount} times`; 45 | }) 46 | ); 47 | 48 | const promiseTimeout = function (timeout: number) { 49 | return new Promise((resolve) => { 50 | setTimeout(resolve, timeout); 51 | }); 52 | }; 53 | 54 | test('caches content and serves same content on cache hit', async (t: ExecutionContext) => { 55 | let res = await server.get('/?basictest'); 56 | const previousCount = handlerCalledCount; 57 | t.is(res.status, 200); 58 | t.is(res.text, 'Called ' + previousCount + ' times'); 59 | 60 | // Workaround for race condition with writing to datastore. 61 | await promiseTimeout(2000); 62 | 63 | res = await server.get('/?basictest'); 64 | t.is(res.status, 200); 65 | t.is(res.text, 'Called ' + previousCount + ' times'); 66 | t.truthy(res.header['x-rendertron-cached']); 67 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 68 | 69 | res = await server.get('/?basictest'); 70 | t.is(res.status, 200); 71 | t.is(res.text, 'Called ' + previousCount + ' times'); 72 | 73 | res = await server.get('/?basictest2'); 74 | t.is(res.status, 200); 75 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 76 | }); 77 | 78 | app.use( 79 | route.get('/set-header', (ctx: Koa.Context) => { 80 | ctx.set('my-header', 'header-value'); 81 | ctx.body = 'set-header-payload'; 82 | }) 83 | ); 84 | 85 | test('caches headers', async (t: ExecutionContext) => { 86 | let res = await server.get('/set-header'); 87 | t.is(res.status, 200); 88 | t.is(res.header['my-header'], 'header-value'); 89 | t.is(res.text, 'set-header-payload'); 90 | 91 | // Workaround for race condition with writing to datastore. 92 | await promiseTimeout(500); 93 | 94 | res = await server.get('/set-header'); 95 | t.is(res.status, 200); 96 | t.is(res.header['my-header'], 'header-value'); 97 | t.is(res.text, 'set-header-payload'); 98 | }); 99 | 100 | app.use( 101 | route.get('/compressed', (ctx: Koa.Context) => { 102 | ctx.set('Content-Type', 'text/html'); 103 | ctx.body = new Array(1025).join('x'); 104 | }) 105 | ); 106 | 107 | test('compression preserved', async (t: ExecutionContext) => { 108 | const expectedBody = new Array(1025).join('x'); 109 | let res = await server 110 | .get('/compressed') 111 | .set('Accept-Encoding', 'gzip, deflate'); 112 | t.is(res.status, 200); 113 | t.is(res.header['content-encoding'], 'gzip'); 114 | t.is(res.text, expectedBody); 115 | 116 | // Workaround for race condition with writing to datastore. 117 | await promiseTimeout(500); 118 | 119 | res = await server.get('/compressed').set('Accept-Encoding', 'gzip, deflate'); 120 | t.is(res.status, 200); 121 | t.is(res.header['content-encoding'], 'gzip'); 122 | t.is(res.text, expectedBody); 123 | }); 124 | 125 | let statusCallCount = 0; 126 | app.use( 127 | route.get('/status/:status', (ctx: Koa.Context, status: string) => { 128 | // Every second call sends a different status. 129 | if (statusCallCount % 2 === 0) { 130 | ctx.status = Number(status); 131 | } else { 132 | ctx.status = 401; 133 | } 134 | statusCallCount++; 135 | }) 136 | ); 137 | 138 | test('original status is preserved', async (t: ExecutionContext) => { 139 | let res = await server.get('/status/400'); 140 | t.is(res.status, 400); 141 | 142 | // Non 200 status code should not be cached. 143 | res = await server.get('/status/400'); 144 | t.is(res.status, 401); 145 | }); 146 | 147 | test('refreshCache refreshes cache', async (t: ExecutionContext) => { 148 | let content = 'content'; 149 | app.use( 150 | route.get('/refreshTest', (ctx: Koa.Context) => { 151 | ctx.body = content; 152 | }) 153 | ); 154 | 155 | let res = await server.get('/refreshTest'); 156 | t.is(res.status, 200); 157 | t.is(res.text, 'content'); 158 | 159 | // Workaround for race condition with writing to datastore. 160 | await promiseTimeout(500); 161 | 162 | res = await server.get('/refreshTest'); 163 | t.truthy(res.header['x-rendertron-cached']); 164 | t.is(res.text, 'content'); 165 | 166 | content = 'updated content'; 167 | 168 | res = await server.get('/refreshTest?refreshCache=true'); 169 | t.is(res.status, 200); 170 | t.is(res.text, 'updated content'); 171 | t.is(res.header['x-rendertron-cached'], undefined); 172 | }); 173 | 174 | test.serial( 175 | 'clear all datastore cache entries', 176 | async (t: ExecutionContext) => { 177 | app.use( 178 | route.get('/clear-all-cache', (ctx: Koa.Context) => { 179 | ctx.body = 'Foo'; 180 | }) 181 | ); 182 | 183 | await server.get('/clear-all-cache?cachedResult1'); 184 | await server.get('/clear-all-cache?cachedResult2'); 185 | 186 | // Workaround for race condition with writing to datastore. 187 | await promiseTimeout(500); 188 | 189 | let res = await server.get('/clear-all-cache?cachedResult1'); 190 | t.is(res.status, 200); 191 | t.truthy(res.header['x-rendertron-cached']); 192 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 193 | res = await server.get('/clear-all-cache?cachedResult2'); 194 | t.is(res.status, 200); 195 | t.truthy(res.header['x-rendertron-cached']); 196 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 197 | 198 | await cache.clearCache(); 199 | 200 | res = await server.get('/clear-all-cache?cachedResult1'); 201 | t.is(res.status, 200); 202 | t.falsy(res.header['x-rendertron-cached']); 203 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 204 | res = await server.get('/clear-all-cache?cachedResult2'); 205 | t.is(res.status, 200); 206 | t.falsy(res.header['x-rendertron-cached']); 207 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 208 | } 209 | ); 210 | -------------------------------------------------------------------------------- /src/test/filesystem-cache-test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | 'use strict'; 18 | 19 | import Koa from 'koa'; 20 | import koaCompress from 'koa-compress'; 21 | import request from 'supertest'; 22 | import route from 'koa-route'; 23 | 24 | import { FilesystemCache } from '../filesystem-cache'; 25 | import { ConfigManager } from '../config'; 26 | import test, { ExecutionContext } from 'ava'; 27 | 28 | const config = ConfigManager.config; 29 | const app = new Koa(); 30 | const server = request(app.listen()); 31 | const cache = new FilesystemCache(config); 32 | 33 | app.use(route.get('/compressed', koaCompress())); 34 | 35 | app.use(cache.middleware()); 36 | 37 | let handlerCalledCount = 0; 38 | 39 | test.before(async () => { 40 | await cache.clearAllCache(); 41 | }); 42 | 43 | app.use( 44 | route.get('/', (ctx: Koa.Context) => { 45 | handlerCalledCount++; 46 | ctx.body = `Called ${handlerCalledCount} times`; 47 | }) 48 | ); 49 | 50 | const promiseTimeout = function (timeout: number) { 51 | return new Promise((resolve) => { 52 | setTimeout(resolve, timeout); 53 | }); 54 | }; 55 | 56 | test('caches content and serves same content on cache hit', async (t: ExecutionContext) => { 57 | const previousCount = handlerCalledCount; 58 | let res = await server.get('/?basictest'); 59 | t.is(res.status, 200); 60 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 61 | 62 | // Workaround for race condition with writing to datastore. 63 | await promiseTimeout(2000); 64 | 65 | res = await server.get('/?basictest'); 66 | t.is(res.status, 200); 67 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 68 | t.truthy(res.header['x-rendertron-cached']); 69 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 70 | 71 | res = await server.get('/?basictest'); 72 | t.is(res.status, 200); 73 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 74 | }); 75 | 76 | app.use( 77 | route.get('/set-header', (ctx: Koa.Context) => { 78 | ctx.set('my-header', 'header-value'); 79 | ctx.body = 'set-header-payload'; 80 | }) 81 | ); 82 | 83 | test('caches headers', async (t: ExecutionContext) => { 84 | let res = await server.get('/set-header'); 85 | t.is(res.status, 200); 86 | t.is(res.header['my-header'], 'header-value'); 87 | t.is(res.text, 'set-header-payload'); 88 | 89 | // Workaround for race condition with writing to datastore. 90 | await promiseTimeout(500); 91 | 92 | res = await server.get('/set-header'); 93 | t.is(res.status, 200); 94 | t.is(res.header['my-header'], 'header-value'); 95 | t.is(res.text, 'set-header-payload'); 96 | }); 97 | 98 | app.use( 99 | route.get('/compressed', (ctx: Koa.Context) => { 100 | ctx.set('Content-Type', 'text/html'); 101 | ctx.body = new Array(1025).join('x'); 102 | }) 103 | ); 104 | 105 | test('compression preserved', async (t: ExecutionContext) => { 106 | const expectedBody = new Array(1025).join('x'); 107 | let res = await server 108 | .get('/compressed') 109 | .set('Accept-Encoding', 'gzip, deflate'); 110 | t.is(res.status, 200); 111 | t.is(res.header['content-encoding'], 'gzip'); 112 | t.is(res.text, expectedBody); 113 | 114 | // Workaround for race condition with writing to datastore. 115 | await promiseTimeout(500); 116 | 117 | res = await server.get('/compressed').set('Accept-Encoding', 'gzip, deflate'); 118 | t.is(res.status, 200); 119 | t.is(res.header['content-encoding'], 'gzip'); 120 | t.is(res.text, expectedBody); 121 | }); 122 | 123 | let statusCallCount = 0; 124 | app.use( 125 | route.get('/status/:status', (ctx: Koa.Context, status: string) => { 126 | // Every second call sends a different status. 127 | if (statusCallCount % 2 === 0) { 128 | ctx.status = Number(status); 129 | } else { 130 | ctx.status = 401; 131 | } 132 | statusCallCount++; 133 | }) 134 | ); 135 | 136 | test('original status is preserved', async (t: ExecutionContext) => { 137 | let res = await server.get('/status/400'); 138 | t.is(res.status, 400); 139 | 140 | // Non 200 status code should not be cached. 141 | res = await server.get('/status/400'); 142 | t.is(res.status, 401); 143 | }); 144 | 145 | test('cache entry can be removed', async (t: ExecutionContext) => { 146 | let res = await server.get('/?cacheremovetest'); 147 | t.is(res.status, 200); 148 | t.falsy(res.header['x-rendertron-cached']); 149 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 150 | 151 | res = await server.get('/?cacheremovetest'); 152 | t.is(res.status, 200); 153 | t.truthy(res.header['x-rendertron-cached']); 154 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 155 | const key = cache.hashCode('/?cacheremovetest'); 156 | cache.clearCache(key); 157 | res = await server.get('/?cacheremovetest'); 158 | t.is(res.status, 200); 159 | t.falsy(res.header['x-rendertron-cached']); 160 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 161 | 162 | res = await server.get('/?cacheremovetest'); 163 | t.is(res.status, 200); 164 | t.truthy(res.header['x-rendertron-cached']); 165 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 166 | }); 167 | 168 | test('refreshCache refreshes cache', async (t: ExecutionContext) => { 169 | let content = 'content'; 170 | app.use( 171 | route.get('/refreshTest', (ctx: Koa.Context) => { 172 | ctx.body = content; 173 | }) 174 | ); 175 | 176 | let res = await server.get('/refreshTest'); 177 | t.is(res.status, 200); 178 | t.is(res.text, 'content'); 179 | 180 | // Workaround for race condition with writing to datastore. 181 | await promiseTimeout(500); 182 | 183 | res = await server.get('/refreshTest'); 184 | t.truthy(res.header['x-rendertron-cached']); 185 | t.is(res.text, 'content'); 186 | 187 | content = 'updated content'; 188 | 189 | res = await server.get('/refreshTest?refreshCache=true'); 190 | t.is(res.status, 200); 191 | t.is(res.text, 'updated content'); 192 | t.is(res.header['x-rendertron-cached'], undefined); 193 | }); 194 | 195 | test.serial( 196 | 'clear all filesystem cache entries', 197 | async (t: ExecutionContext) => { 198 | app.use( 199 | route.get('/clear-all-cache', (ctx: Koa.Context) => { 200 | ctx.body = 'Foo'; 201 | }) 202 | ); 203 | 204 | await server.get('/clear-all-cache?cachedResult1'); 205 | await server.get('/clear-all-cache?cachedResult2'); 206 | 207 | let res = await server.get('/clear-all-cache?cachedResult1'); 208 | t.is(res.status, 200); 209 | t.truthy(res.header['x-rendertron-cached']); 210 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 211 | res = await server.get('/clear-all-cache?cachedResult2'); 212 | t.is(res.status, 200); 213 | t.truthy(res.header['x-rendertron-cached']); 214 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 215 | 216 | cache.clearAllCache(); 217 | await promiseTimeout(500); 218 | res = await server.get('/clear-all-cache?cachedResult1'); 219 | t.is(res.status, 200); 220 | t.falsy(res.header['x-rendertron-cached']); 221 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 222 | res = await server.get('/clear-all-cache?cachedResult2'); 223 | t.is(res.status, 200); 224 | t.falsy(res.header['x-rendertron-cached']); 225 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 226 | } 227 | ); 228 | -------------------------------------------------------------------------------- /src/test/memory-cache-test.ts: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | 'use strict'; 18 | 19 | import Koa from 'koa'; 20 | import koaCompress from 'koa-compress'; 21 | import request from 'supertest'; 22 | import route from 'koa-route'; 23 | 24 | import { MemoryCache } from '../memory-cache'; 25 | import test, { ExecutionContext } from 'ava'; 26 | 27 | const app = new Koa(); 28 | const server = request(app.listen()); 29 | const cache = new MemoryCache(); 30 | 31 | app.use(route.get('/compressed', koaCompress())); 32 | 33 | app.use(cache.middleware()); 34 | 35 | let handlerCalledCount = 0; 36 | 37 | test.before(async () => { 38 | handlerCalledCount = 0; 39 | await cache.clearCache(); 40 | }); 41 | 42 | app.use( 43 | route.get('/', (ctx: Koa.Context) => { 44 | handlerCalledCount++; 45 | ctx.body = `Called ${handlerCalledCount} times`; 46 | }) 47 | ); 48 | 49 | const promiseTimeout = function (timeout: number) { 50 | return new Promise((resolve) => { 51 | setTimeout(resolve, timeout); 52 | }); 53 | }; 54 | 55 | test('caches content and serves same content on cache hit', async (t: ExecutionContext) => { 56 | const previousCount = handlerCalledCount; 57 | let res = await server.get('/?basictest'); 58 | t.is(res.status, 200); 59 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 60 | 61 | // Workaround for race condition with writing to datastore. 62 | await promiseTimeout(500); 63 | 64 | res = await server.get('/?basictest'); 65 | t.is(res.status, 200); 66 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 67 | t.truthy(res.header['x-rendertron-cached']); 68 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 69 | 70 | res = await server.get('/?basictest'); 71 | t.is(res.status, 200); 72 | t.is(res.text, 'Called ' + (previousCount + 1) + ' times'); 73 | }); 74 | 75 | app.use( 76 | route.get('/set-header', (ctx: Koa.Context) => { 77 | ctx.set('my-header', 'header-value'); 78 | ctx.body = 'set-header-payload'; 79 | }) 80 | ); 81 | 82 | test('caches headers', async (t: ExecutionContext) => { 83 | let res = await server.get('/set-header'); 84 | t.is(res.status, 200); 85 | t.is(res.header['my-header'], 'header-value'); 86 | t.is(res.text, 'set-header-payload'); 87 | 88 | // Workaround for race condition with writing to datastore. 89 | await promiseTimeout(500); 90 | 91 | res = await server.get('/set-header'); 92 | t.is(res.status, 200); 93 | t.is(res.header['my-header'], 'header-value'); 94 | t.is(res.text, 'set-header-payload'); 95 | }); 96 | 97 | app.use( 98 | route.get('/compressed', (ctx: Koa.Context) => { 99 | ctx.set('Content-Type', 'text/html'); 100 | ctx.body = new Array(1025).join('x'); 101 | }) 102 | ); 103 | 104 | test('compression preserved', async (t: ExecutionContext) => { 105 | const expectedBody = new Array(1025).join('x'); 106 | let res = await server 107 | .get('/compressed') 108 | .set('Accept-Encoding', 'gzip, deflate'); 109 | t.is(res.status, 200); 110 | t.is(res.header['content-encoding'], 'gzip'); 111 | t.is(res.text, expectedBody); 112 | 113 | // Workaround for race condition with writing to datastore. 114 | await promiseTimeout(500); 115 | 116 | res = await server.get('/compressed').set('Accept-Encoding', 'gzip, deflate'); 117 | t.is(res.status, 200); 118 | t.is(res.header['content-encoding'], 'gzip'); 119 | t.is(res.text, expectedBody); 120 | }); 121 | 122 | let statusCallCount = 0; 123 | app.use( 124 | route.get('/status/:status', (ctx: Koa.Context, status: string) => { 125 | // Every second call sends a different status. 126 | if (statusCallCount % 2 === 0) { 127 | ctx.status = Number(status); 128 | } else { 129 | ctx.status = 401; 130 | } 131 | statusCallCount++; 132 | }) 133 | ); 134 | 135 | test('original status is preserved', async (t: ExecutionContext) => { 136 | let res = await server.get('/status/400'); 137 | t.is(res.status, 400); 138 | 139 | // Non 200 status code should not be cached. 140 | res = await server.get('/status/400'); 141 | t.is(res.status, 401); 142 | }); 143 | 144 | test('cache entry can be removed', async (t: ExecutionContext) => { 145 | let counter = 0; 146 | app.use( 147 | route.get('/removalTest', (ctx: Koa.Context) => { 148 | ctx.body = `Counter: ${++counter}`; 149 | }) 150 | ); 151 | 152 | let res = await server.get('/?cacheremovetest'); 153 | t.is(res.status, 200); 154 | t.falsy(res.header['x-rendertron-cached']); 155 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 156 | 157 | res = await server.get('/?cacheremovetest'); 158 | 159 | t.is(res.status, 200); 160 | t.truthy(res.header['x-rendertron-cached']); 161 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 162 | 163 | cache.removeEntry('/?cacheremovetest'); 164 | res = await server.get('/?cacheremovetest'); 165 | t.is(res.status, 200); 166 | t.falsy(res.header['x-rendertron-cached']); 167 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 168 | 169 | res = await server.get('/?cacheremovetest'); 170 | t.is(res.status, 200); 171 | t.truthy(res.header['x-rendertron-cached']); 172 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 173 | }); 174 | 175 | test('refreshCache refreshes cache', async (t: ExecutionContext) => { 176 | let content = 'content'; 177 | app.use( 178 | route.get('/refreshTest', (ctx: Koa.Context) => { 179 | ctx.body = content; 180 | }) 181 | ); 182 | 183 | let res = await server.get('/refreshTest'); 184 | t.is(res.status, 200); 185 | t.is(res.text, 'content'); 186 | 187 | // Workaround for race condition with writing to datastore. 188 | await promiseTimeout(500); 189 | 190 | res = await server.get('/refreshTest'); 191 | t.truthy(res.header['x-rendertron-cached']); 192 | t.is(res.text, 'content'); 193 | 194 | content = 'updated content'; 195 | 196 | res = await server.get('/refreshTest?refreshCache=true'); 197 | t.is(res.status, 200); 198 | t.is(res.text, 'updated content'); 199 | t.is(res.header['x-rendertron-cached'], undefined); 200 | }); 201 | 202 | test.serial('clear all memory cache entries', async (t: ExecutionContext) => { 203 | app.use( 204 | route.get('/clear-all-cache', (ctx: Koa.Context) => { 205 | ctx.body = 'Foo'; 206 | }) 207 | ); 208 | 209 | await server.get('/clear-all-cache?cachedResult1'); 210 | await server.get('/clear-all-cache?cachedResult2'); 211 | 212 | let res = await server.get('/clear-all-cache?cachedResult1'); 213 | t.is(res.status, 200); 214 | t.truthy(res.header['x-rendertron-cached']); 215 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 216 | res = await server.get('/clear-all-cache?cachedResult2'); 217 | t.is(res.status, 200); 218 | t.truthy(res.header['x-rendertron-cached']); 219 | t.true(new Date(res.header['x-rendertron-cached']) <= new Date()); 220 | 221 | cache.clearCache(); 222 | 223 | res = await server.get('/clear-all-cache?cachedResult1'); 224 | t.is(res.status, 200); 225 | t.falsy(res.header['x-rendertron-cached']); 226 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 227 | res = await server.get('/clear-all-cache?cachedResult2'); 228 | t.is(res.status, 200); 229 | t.falsy(res.header['x-rendertron-cached']); 230 | t.false(new Date(res.header['x-rendertron-cached']) <= new Date()); 231 | }); 232 | -------------------------------------------------------------------------------- /test-resources/basic-script.html: -------------------------------------------------------------------------------- 1 | 16 | 21 | -------------------------------------------------------------------------------- /test-resources/custom-element.html: -------------------------------------------------------------------------------- 1 | 16 | 26 | -------------------------------------------------------------------------------- /test-resources/explicit-render-event.html: -------------------------------------------------------------------------------- 1 | 16 | 19 | 28 | -------------------------------------------------------------------------------- /test-resources/http-meta-status-code-multiple.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /test-resources/http-meta-status-code.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | -------------------------------------------------------------------------------- /test-resources/include-base-as-directory.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /test-resources/include-base.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /test-resources/include-date.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 23 | -------------------------------------------------------------------------------- /test-resources/include-doctype.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /test-resources/include-json-ld.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 35 | 38 | 41 | 44 | 47 | 48 | -------------------------------------------------------------------------------- /test-resources/include-script.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /test-resources/inject-element-after-load.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | window.onload = function () { 18 | var element = document.createElement('div'); 19 | element.textContent = 'injected' + 'Element'; 20 | document.body.appendChild(element); 21 | }; 22 | -------------------------------------------------------------------------------- /test-resources/inject-element-module.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | var element = document.createElement('div'); 18 | element.textContent = 'injectedElement by module script'; 19 | document.body.appendChild(element); 20 | -------------------------------------------------------------------------------- /test-resources/inject-element.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | 17 | var element = document.createElement('div'); 18 | element.textContent = 'injectedElement'; 19 | document.body.appendChild(element); 20 | -------------------------------------------------------------------------------- /test-resources/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "requires": true, 3 | "lockfileVersion": 1, 4 | "dependencies": { 5 | "@webcomponents/webcomponentsjs": { 6 | "version": "2.4.0", 7 | "resolved": "https://registry.npmjs.org/@webcomponents/webcomponentsjs/-/webcomponentsjs-2.4.0.tgz", 8 | "integrity": "sha512-kEClEz2nu9/i6SvyBJTV4pCc6CyCzMhK7zEeJ6QhiJoulBp4YZ06Zfj2E2HUXfWfHJIjtKriJYMtfhettKEjEg==" 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test-resources/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@webcomponents/webcomponentsjs": "^2.4.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /test-resources/request-header.html: -------------------------------------------------------------------------------- 1 | 16 | 21 | -------------------------------------------------------------------------------- /test-resources/restrict-test.test.html: -------------------------------------------------------------------------------- 1 | 16 | 21 | -------------------------------------------------------------------------------- /test-resources/script-after-load.html: -------------------------------------------------------------------------------- 1 | 16 | 23 | -------------------------------------------------------------------------------- /test-resources/shadow-dom-no-polyfill.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 28 | -------------------------------------------------------------------------------- /test-resources/shadow-dom-polyfill-all.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 29 | -------------------------------------------------------------------------------- /test-resources/shadow-dom-polyfill-loader.html: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 29 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2017", 4 | "module": "commonjs", 5 | "esModuleInterop": true, 6 | "strict": true, 7 | "noUnusedLocals": true, 8 | "noUnusedParameters": true, 9 | "pretty": true, 10 | "declaration": true, 11 | "sourceMap": true, 12 | "lib": ["es2017", "esnext.asynciterable", "dom"], 13 | "rootDir": "./src", 14 | "outDir": "./build", 15 | "skipLibCheck": true 16 | }, 17 | "include": ["./src/**/*.ts"] 18 | } 19 | --------------------------------------------------------------------------------