├── .gitignore ├── spec ├── support │ └── jasmine.json ├── utilSpec.js ├── originRequestSpec.js └── viewerRequestSpec.js ├── package.json ├── validate.js ├── Makefile ├── LICENSE ├── .github └── workflows │ └── node.js.yml ├── lib ├── util.js ├── OriginRequestInterface.js └── ViewerRequestInterface.js ├── create-invalidation.js ├── serverless.yml ├── deploy.js ├── handler.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # package directories 2 | node_modules 3 | jspm_packages 4 | 5 | # Serverless directories 6 | .serverless -------------------------------------------------------------------------------- /spec/support/jasmine.json: -------------------------------------------------------------------------------- 1 | { 2 | "spec_dir": "spec", 3 | "spec_files": [ 4 | "**/*[sS]pec.js" 5 | ], 6 | "helpers": [ 7 | "helpers/**/*.js" 8 | ], 9 | "stopSpecOnExpectationFailure": false, 10 | "random": false 11 | } 12 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "aws-sdk": "^2.1120.0", 4 | "jasmine": "^4.1.0", 5 | "nock": "^13.5.4", 6 | "serverless": "3.39.0" 7 | }, 8 | "dependencies": { 9 | "prerendercloud": "1.48.0" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /validate.js: -------------------------------------------------------------------------------- 1 | function monkeyPatchPrerenderCloud(lib, options) { 2 | const origSet = lib.set; 3 | lib.set = function(key, val) { 4 | options[key] = val; 5 | origSet.apply(undefined, arguments); 6 | }; 7 | } 8 | const prerendercloud = require("prerendercloud"); 9 | const options = {}; 10 | monkeyPatchPrerenderCloud(prerendercloud, options); 11 | 12 | const handler = require("./handler"); 13 | handler.resetPrerenderCloud(); 14 | 15 | if (!options["prerenderToken"]) { 16 | console.log( 17 | "warning, prerenderToken was not set, requests will be rate limited" 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: destroy deploy invalidate listinvalidations test destroy 2 | 3 | destroy: 4 | ./node_modules/.bin/serverless remove 5 | 6 | deploy: 7 | node ./validate.js 8 | ./node_modules/.bin/serverless deploy 9 | CLOUDFRONT_DISTRIBUTION_ID="${CLOUDFRONT_DISTRIBUTION_ID}" node deploy.js 10 | CLOUDFRONT_DISTRIBUTION_ID="${CLOUDFRONT_DISTRIBUTION_ID}" node create-invalidation.js 11 | 12 | invalidate: 13 | CLOUDFRONT_DISTRIBUTION_ID="${CLOUDFRONT_DISTRIBUTION_ID}" node create-invalidation.js 14 | 15 | listinvalidations: 16 | aws cloudfront list-invalidations --distribution-id "" | head 17 | 18 | test: 19 | DEBUG=prerendercloud PRERENDER_SERVICE_URL="https://service.prerender.cloud" ./node_modules/jasmine/bin/jasmine.js 20 | -------------------------------------------------------------------------------- /spec/utilSpec.js: -------------------------------------------------------------------------------- 1 | const util = require("../lib/util"); 2 | 3 | describe("util", function() { 4 | describe("isHtml", function() { 5 | it("detects no extension", function() { 6 | expect(util.isHtml("/")).toBe(true); 7 | }); 8 | it("detects html", function() { 9 | expect(util.isHtml("index.html")).toBe(true); 10 | }); 11 | it("detects htm", function() { 12 | expect(util.isHtml("index.htm")).toBe(true); 13 | }); 14 | it("detects double dot html", function() { 15 | expect(util.isHtml("index.bak.html")).toBe(true); 16 | }); 17 | it("does not detect js", function() { 18 | expect(util.isHtml("index.js")).toBe(false); 19 | }); 20 | it("handles miscellaneous dots", function() { 21 | expect( 22 | util.isHtml( 23 | "categories/1234;lat=-999999.8888888;lng=12341234.13371337;location=SanFrancisco" 24 | ) 25 | ).toBe(true); 26 | }); 27 | }); 28 | }); 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jonathan Otto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [master] 9 | pull_request: 10 | branches: [master] 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | node-version: [20.x] 18 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: actions/setup-node@v4 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | # cache: "npm" 25 | # cache-dependency-path: "package-lock.json" 26 | - uses: actions/cache@v4 27 | with: 28 | path: "**/node_modules" 29 | key: ${{ runner.os }}-modules-${{ hashFiles('**/package-lock.json') }} 30 | # - name: Debugging with ssh 31 | # uses: lhotari/action-upterm@v1 32 | - run: npm install 33 | - run: make test 34 | -------------------------------------------------------------------------------- /lib/util.js: -------------------------------------------------------------------------------- 1 | const url = require("url"); 2 | const USER_AGENT_PLACEHOLDER = "prerendercloud-lambda-edge-original-user-agent"; 3 | 4 | const getHeader = (cloudFrontRequest, name) => 5 | cloudFrontRequest.headers[name] && 6 | cloudFrontRequest.headers[name][0] && 7 | cloudFrontRequest.headers[name][0].value; 8 | 9 | const toBase64 = str => Buffer.from(str).toString("base64"); 10 | const fromBase64 = str => Buffer.from(str, "base64").toString("utf8"); 11 | const createUri = (uri, shouldPrerender, host) => 12 | "/" + toBase64(JSON.stringify({ uri, shouldPrerender, host })); 13 | 14 | const parseUriField = uri => { 15 | // uri has leading slash 16 | return JSON.parse(fromBase64(uri.slice(1))); 17 | }; 18 | 19 | const isHtml = require("prerendercloud").util.urlPathIsHtml; 20 | 21 | // this function exists in the npm lib: prerendercloud 22 | // but must also exist here since we use it in our 404->/index.html 23 | // functionality when the file has no extension or .html extension 24 | const pathIsBlacklisted = (blacklistedPaths, cloudfrontUri) => { 25 | const paths = blacklistedPaths; 26 | 27 | if (paths && Array.isArray(paths)) { 28 | return paths.some(path => { 29 | if (path === cloudfrontUri) return true; 30 | 31 | if (path.endsWith("*")) { 32 | const starIndex = path.indexOf("*"); 33 | const pathSlice = path.slice(0, starIndex); 34 | 35 | if (cloudfrontUri.startsWith(pathSlice)) return true; 36 | } 37 | 38 | return false; 39 | }); 40 | } 41 | 42 | return false; 43 | }; 44 | 45 | const shouldRewriteToIndexHtml = (req, cachedOptions, uri) => { 46 | return ( 47 | isHtml(uri) && 48 | (!cachedOptions.blacklistPaths || 49 | (cachedOptions.blacklistPaths && 50 | !pathIsBlacklisted(cachedOptions.blacklistPaths(req), uri))) 51 | ); 52 | }; 53 | 54 | module.exports = { 55 | USER_AGENT_PLACEHOLDER, 56 | toBase64, 57 | fromBase64, 58 | createUri, 59 | getHeader, 60 | parseUriField, 61 | isHtml, 62 | shouldRewriteToIndexHtml 63 | }; 64 | -------------------------------------------------------------------------------- /create-invalidation.js: -------------------------------------------------------------------------------- 1 | // This script is meant to be run from your own laptop, build environment, 2 | // or some separate process (as opposed to the Lambda@Edge function) 3 | // It expects CLOUDFRONT_DISTRIBUTION_ID env var 4 | // and since it uses the aws-sdk lib, it assumes your AWS keys are in either: 5 | // * in the file: ~/.aws/credentials 6 | // * or in the env vars: AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY 7 | // see https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/setting-credentials-node.html 8 | 9 | if (!process.env["CLOUDFRONT_DISTRIBUTION_ID"]) { 10 | throw new Error("CLOUDFRONT_DISTRIBUTION_ID env var must be set"); 11 | } 12 | 13 | CLOUDFRONT_DISTRIBUTION_ID = process.env["CLOUDFRONT_DISTRIBUTION_ID"]; 14 | 15 | const AWS = require("aws-sdk"); 16 | const cloudfront = new AWS.CloudFront(); 17 | const util = require("./lib/util"); 18 | 19 | function createCloudfrontInvalidation(items = []) { 20 | return cloudfront 21 | .createInvalidation({ 22 | DistributionId: CLOUDFRONT_DISTRIBUTION_ID, 23 | InvalidationBatch: { 24 | Paths: { Quantity: items.length, Items: items }, 25 | CallerReference: new Date().toISOString() 26 | } 27 | }) 28 | .promise() 29 | .then(console.log); 30 | } 31 | 32 | // e.g. invalidatePaths(["/docs", "/pricing"]) 33 | function invalidatePaths(paths) { 34 | // map the paths into the custom format this library uses: "/" + toBase64(JSON.stringify({ uri, shouldPrerender })); 35 | // before: ["/docs", "/pricing"] 36 | // after: ["eyJ1cmkiOiIvZG9jcyIsInNob3VsZFByZXJlbmRlciI6dHJ1ZX0=", "/eyJ1cmkiOiIvcHJpY2luZyIsInNob3VsZFByZXJlbmRlciI6dHJ1ZX0="] 37 | const cloudFrontUrls = paths.map(path => util.createUri(path, true)); 38 | 39 | return createCloudfrontInvalidation(cloudFrontUrls); 40 | } 41 | 42 | function invalidateEverything() { 43 | return createCloudfrontInvalidation(["/*"]); 44 | } 45 | 46 | invalidateEverything(); 47 | 48 | // This script calls `invalidateEverything` to invalidate all possible paths on your 49 | // CloudFront distribution. If instead you want to invalidate a distinct set of paths, 50 | // use invalidatePaths. 51 | // invalidatePaths(["/docs", "/pricing"]); 52 | -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | # see limits on Lambda@Edge http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html#limits-lambda-at-edge 2 | 3 | service: Lambda-Edge-Prerendercloud 4 | 5 | # You can pin your service to only deploy with a specific Serverless version 6 | # Check out our docs for more details 7 | # frameworkVersion: "=X.X.X" 8 | 9 | provider: 10 | name: aws 11 | runtime: nodejs20.x 12 | region: us-east-1 13 | memorySize: 128 14 | role: LambdaEdgeRole 15 | # you can overwrite defaults here 16 | # stage: dev 17 | # you could define service wide environment variables here 18 | # but lambda@edge doesn't support them and will throw error if you try 19 | # environment: 20 | # variable1: value1 21 | 22 | # you can add packaging information here 23 | package: 24 | # include: 25 | # - include-me.js 26 | # - include-me-dir/** 27 | excludeDevDependencies: true 28 | exclude: 29 | - spec/** 30 | - package-lock.json 31 | - yarn.lock 32 | - README.md 33 | - deploy.js 34 | - create-invalidation.js 35 | - LICENSE 36 | - Makefile 37 | 38 | functions: 39 | viewerRequest: 40 | handler: handler.viewerRequest 41 | timeout: 5 42 | originRequest: 43 | handler: handler.originRequest 44 | timeout: 30 45 | originResponse: 46 | handler: handler.originResponse 47 | timeout: 5 48 | 49 | # I created a Lambda@Edge function via the wizard in Lambda Console 50 | # and then copied the role and pasted it here 51 | resources: 52 | Resources: 53 | LambdaEdgeRole: 54 | Type: AWS::IAM::Role 55 | Properties: 56 | AssumeRolePolicyDocument: 57 | Statement: 58 | - Effect: Allow 59 | Principal: 60 | Service: 61 | - edgelambda.amazonaws.com 62 | - lambda.amazonaws.com 63 | Action: 64 | - sts:AssumeRole 65 | Policies: 66 | - PolicyName: LambdaEdgeExecutionRole 67 | PolicyDocument: 68 | Version: "2012-10-17" 69 | Statement: 70 | - Effect: Allow 71 | Action: 72 | - logs:CreateLogGroup 73 | - logs:CreateLogStream 74 | - logs:PutLogEvents 75 | Resource: "arn:aws:logs:*:*:*" 76 | -------------------------------------------------------------------------------- /deploy.js: -------------------------------------------------------------------------------- 1 | if (!process.env["CLOUDFRONT_DISTRIBUTION_ID"]) { 2 | throw new Error("CLOUDFRONT_DISTRIBUTION_ID env var must be set"); 3 | } 4 | 5 | CLOUDFRONT_DISTRIBUTION_ID = process.env["CLOUDFRONT_DISTRIBUTION_ID"]; 6 | 7 | const lambdaMappings = [ 8 | { 9 | FunctionName: "Lambda-Edge-Prerendercloud-dev-viewerRequest", 10 | EventType: "viewer-request" 11 | }, 12 | { 13 | FunctionName: "Lambda-Edge-Prerendercloud-dev-originRequest", 14 | EventType: "origin-request" 15 | }, 16 | { 17 | FunctionName: "Lambda-Edge-Prerendercloud-dev-originResponse", 18 | EventType: "origin-response" 19 | } 20 | ]; 21 | 22 | const AWS = require("aws-sdk"); 23 | AWS.config.region = "us-east-1"; 24 | 25 | const lambda = new AWS.Lambda(); 26 | const cloudfront = new AWS.CloudFront(); 27 | 28 | const getLastPageOfVersions = (lambdaMapping, Marker) => 29 | lambda 30 | .listVersionsByFunction({ 31 | FunctionName: lambdaMapping.FunctionName, 32 | MaxItems: 1000, // there's a bug that causes this to return 50 no matter what https://github.com/aws/aws-sdk-js/issues/1118 33 | Marker 34 | }) 35 | .promise() 36 | .then(res => { 37 | if (res.NextMarker) 38 | return getLastPageOfVersions(lambdaMapping, res.NextMarker); 39 | 40 | return res; 41 | }); 42 | 43 | const getLatestVersion = lambdaMapping => 44 | getLastPageOfVersions(lambdaMapping) 45 | .then( 46 | res => 47 | res.Versions.reduce((prev, curr) => ( 48 | isNaN(curr.Version) 49 | || parseInt(prev.Version) > parseInt(curr.Version) ? prev : curr 50 | )) 51 | ) 52 | .then(latest => ({ 53 | EventType: lambdaMapping.EventType, 54 | LambdaFunctionARN: latest.FunctionArn 55 | })); 56 | 57 | const updateCloudFront = (cloudFrontId, lambdaMappings) => 58 | cloudfront.getDistributionConfig({ Id: cloudFrontId }).promise().then(res => { 59 | console.log( 60 | "before", 61 | res.DistributionConfig.DefaultCacheBehavior.LambdaFunctionAssociations 62 | .Items 63 | ); 64 | res.DistributionConfig.DefaultCacheBehavior.LambdaFunctionAssociations = { 65 | Quantity: lambdaMappings.length, 66 | Items: lambdaMappings 67 | }; 68 | console.log( 69 | "after", 70 | res.DistributionConfig.DefaultCacheBehavior.LambdaFunctionAssociations 71 | .Items 72 | ); 73 | 74 | const IfMatch = res.ETag; 75 | delete res.ETag; 76 | const Id = cloudFrontId; 77 | 78 | return cloudfront 79 | .updateDistribution(Object.assign(res, { Id, IfMatch })) 80 | .promise(); 81 | }); 82 | 83 | return Promise.all( 84 | lambdaMappings.map(lambdaMapping => getLatestVersion(lambdaMapping)) 85 | ) 86 | .then(lambdaMappings => 87 | updateCloudFront(CLOUDFRONT_DISTRIBUTION_ID, lambdaMappings) 88 | ) 89 | .catch(err => { 90 | console.log( 91 | "\n\n------Error while associating Lambda functions with CloudFront------\n\n" 92 | ); 93 | console.error(err); 94 | console.log("\n\n"); 95 | }) 96 | .then(res => { 97 | console.log("\n\n"); 98 | // console.log(res); 99 | console.log( 100 | "\n\nSuccessfully associated Lambda functions with CloudFront\n\n" 101 | ); 102 | }); 103 | -------------------------------------------------------------------------------- /lib/OriginRequestInterface.js: -------------------------------------------------------------------------------- 1 | const util = require("./util"); 2 | 3 | module.exports = class OriginRequestInterface { 4 | static create(cachedOptions, cloudFrontRequest, callback) { 5 | const ori = new this(cloudFrontRequest, callback); 6 | const { req, shouldPrerender } = ori.createReq(); 7 | const next = ori.createNext(req, cachedOptions); 8 | const res = ori.createRes(next); 9 | 10 | return { req, res, next, shouldPrerender }; 11 | } 12 | constructor(cloudFrontRequest, callback) { 13 | this.cloudFrontRequest = cloudFrontRequest; 14 | this.callback = callback; 15 | this.headers = {}; 16 | } 17 | 18 | createReq() { 19 | console.log("about to parse URI", this.cloudFrontRequest.uri); 20 | const { uri, shouldPrerender, host } = util.parseUriField( 21 | this.cloudFrontRequest.uri 22 | ); 23 | 24 | this.originalUri = uri; 25 | 26 | console.log("parsed URI", { uri, shouldPrerender, host }); 27 | 28 | const req = { 29 | connection: { encrypted: true }, 30 | method: this.cloudFrontRequest.method, 31 | originalUrl: uri, 32 | url: uri, 33 | headers: { 34 | host: host, 35 | "user-agent": util.getHeader( 36 | this.cloudFrontRequest, 37 | util.USER_AGENT_PLACEHOLDER 38 | ), 39 | "accept-encoding": util.getHeader( 40 | this.cloudFrontRequest, 41 | "accept-encoding" 42 | ) 43 | } 44 | }; 45 | 46 | return { req, shouldPrerender }; 47 | } 48 | createRes(next) { 49 | const res = { 50 | // the vary package in prerendercloud needs getHeader and setHeader 51 | getHeader: key => { 52 | return this.headers[key]; 53 | }, 54 | setHeader: (key, val) => { 55 | this.headers[key] = val; 56 | }, 57 | end: body => { 58 | const res = { 59 | status: this.status, 60 | statusDescription: "OK", 61 | headers: this.headers, 62 | body: body 63 | }; 64 | 65 | if ( 66 | this.originalHeaders["content-encoding"] && 67 | this.originalHeaders["content-encoding"].match(/gzip/) 68 | ) { 69 | res.body = res.body.toString("base64"); 70 | res.bodyEncoding = "base64"; 71 | } 72 | 73 | if (body && body.length >= 1048000) { 74 | console.log("bailing out because size is over 1mb"); 75 | return next(); 76 | } 77 | 78 | return this.callback(null, res); 79 | }, 80 | writeHead: (_status, _headers) => { 81 | const mergedHeaders = Object.assign({}, _headers, this.headers); 82 | 83 | this.status = `${_status}`; 84 | this.originalHeaders = Object.assign({}, mergedHeaders); 85 | this.headers = Object.keys(mergedHeaders).reduce((memo, headerKey) => { 86 | return Object.assign(memo, { 87 | [headerKey.toLowerCase()]: [ 88 | { 89 | key: headerKey, 90 | value: mergedHeaders[headerKey] 91 | } 92 | ] 93 | }); 94 | }, {}); 95 | } 96 | }; 97 | 98 | return res; 99 | } 100 | createNext(req, cachedOptions) { 101 | // this flow will be called for prerender.cloud LAST_RESORT_TIMEOUT 102 | // aka "prerendercloud middleware SKIPPED: server error: Request timed out" 103 | // it will merely fall back to non-prerendered content 104 | return () => { 105 | delete this.cloudFrontRequest.headers[util.USER_AGENT_PLACEHOLDER]; 106 | 107 | if (util.shouldRewriteToIndexHtml(req, cachedOptions, this.originalUri)) { 108 | console.log("OriginRequestInterface.next", { 109 | rewriteToIndexHtml: true 110 | }); 111 | this.cloudFrontRequest.uri = "/index.html"; 112 | } else { 113 | console.log("OriginRequestInterface.next", { 114 | rewriteToIndexHtml: false 115 | }); 116 | this.cloudFrontRequest.uri = this.originalUri; 117 | } 118 | 119 | // console.log("OriginRequest calling next", this.cloudFrontRequest); 120 | this.callback(null, this.cloudFrontRequest); 121 | }; 122 | } 123 | }; 124 | -------------------------------------------------------------------------------- /lib/ViewerRequestInterface.js: -------------------------------------------------------------------------------- 1 | const util = require("./util"); 2 | 3 | module.exports = class ViewerRequestInterface { 4 | static create(cachedOptions, cloudFrontRequest, callback) { 5 | const vri = new this(cloudFrontRequest, callback); 6 | 7 | const req = vri.createReq(); 8 | const res = vri.createRes(); 9 | const next = vri.createNext(req, cachedOptions); 10 | 11 | return { req, res, next }; 12 | } 13 | constructor(cloudFrontRequest, callback) { 14 | this.cloudFrontRequest = cloudFrontRequest; 15 | const querystring = this.cloudFrontRequest.querystring 16 | ? `?${this.cloudFrontRequest.querystring}` 17 | : ""; 18 | this.originalUrl = this.cloudFrontRequest.uri + querystring; 19 | this.callback = callback; 20 | this.headers = {}; 21 | } 22 | 23 | createReq() { 24 | const req = { 25 | method: this.cloudFrontRequest.method, 26 | originalUrl: this.originalUrl, 27 | url: this.originalUrl, 28 | headers: { 29 | host: util.getHeader(this.cloudFrontRequest, "host"), 30 | "user-agent": util.getHeader(this.cloudFrontRequest, "user-agent") 31 | } 32 | }; 33 | 34 | return req; 35 | } 36 | createRes() { 37 | const res = { 38 | // the vary package in prerendercloud needs getHeader and setHeader 39 | getHeader: key => { 40 | return this.headers[key]; 41 | }, 42 | setHeader: (key, val) => { 43 | this.headers[key] = val; 44 | }, 45 | writeHead(_status, _headers) {} 46 | }; 47 | 48 | res.end = body => { 49 | // since the user-agent header will be overwritten with CloudFront 50 | // we use this to hint at the real one, but: 51 | // 1. it will not affect the cache-key 52 | // 2. prerender.cloud will only see it once (after that, the req will be cached in CloudFront) 53 | // 3. we don't need this for anything other than the potential for user stats/analytics in prerender.cloud 54 | // (i.e. the user can see the user-agent of the request that triggered the first CloudFront request) 55 | this.cloudFrontRequest.headers[util.USER_AGENT_PLACEHOLDER] = [ 56 | { 57 | key: util.USER_AGENT_PLACEHOLDER, 58 | value: util.getHeader(this.cloudFrontRequest, "user-agent") 59 | } 60 | ]; 61 | 62 | const origCloudFrontUri = this.cloudFrontRequest.uri; 63 | 64 | // res.prerender.url.requestedPath is set by https://github.com/sanfrancesco/prerendercloud-nodejs 65 | // specifically for this Lambda lib - it's the requested path after applying the whitelistQueryParams 66 | this.cloudFrontRequest.uri = util.createUri( 67 | res.prerender.url.requestedPath, 68 | true, 69 | util.getHeader(this.cloudFrontRequest, "host") 70 | ); 71 | 72 | console.log({ 73 | shouldPrerender: true, 74 | cloudFrontUriAfterEncode: this.cloudFrontRequest.uri, 75 | requestedUriAfterWhitelist: res.prerender.url.requestedPath, 76 | originalCloudFrontUri: origCloudFrontUri, 77 | originalCloudFrontQuerystring: this.cloudFrontRequest.querystring 78 | }); 79 | 80 | this.callback(null, this.cloudFrontRequest); 81 | }; 82 | 83 | res.headers = {}; 84 | return res; 85 | } 86 | createNext(req, cachedOptions) { 87 | return () => { 88 | if ( 89 | util.shouldRewriteToIndexHtml( 90 | req, 91 | cachedOptions, 92 | this.cloudFrontRequest.uri 93 | ) 94 | ) { 95 | console.log("ViewerRequestInterface.next", { 96 | rewriteToIndexHtml: true 97 | }); 98 | this.cloudFrontRequest.uri = "/index.html"; 99 | } else { 100 | console.log("ViewerRequestInterface.next", { 101 | rewriteToIndexHtml: false 102 | }); 103 | } 104 | 105 | // the URI will not include query string when not pre-rendering 106 | // (because if not pre-rendering, we don't want to mutate the URI field) 107 | this.cloudFrontRequest.uri = util.createUri( 108 | this.cloudFrontRequest.uri, 109 | false 110 | ); 111 | 112 | console.log({ shouldPrerender: false, uri: this.cloudFrontRequest.uri }); 113 | this.callback(null, this.cloudFrontRequest); 114 | }; 115 | } 116 | }; 117 | -------------------------------------------------------------------------------- /spec/originRequestSpec.js: -------------------------------------------------------------------------------- 1 | const handler = require("../handler"); 2 | const nock = require("nock"); 3 | const util = require("../lib/util"); 4 | const zlib = require("zlib"); 5 | 6 | describe("originRequest", function() { 7 | beforeEach(function() { 8 | handler.setPrerenderCloudOption(prerendercloud => null); 9 | nock.cleanAll(); 10 | nock.disableNetConnect(); 11 | }); 12 | 13 | function runHandlerWithOriginRequestEvent() { 14 | beforeEach(function(done) { 15 | this.cb = jasmine.createSpy("originalCallback").and.callFake(() => done()); 16 | this.handler(this.event, this.context, this.cb); 17 | }); 18 | } 19 | 20 | function itReturnsPrerenderCloudResponse(extraHeaders = {}) { 21 | it("calls callback with prerendered body and headers", function() { 22 | expect(this.cb).toHaveBeenCalledWith(null, { 23 | status: "200", 24 | statusDescription: "OK", 25 | headers: Object.assign( 26 | {}, 27 | { 28 | "content-type": [{ key: "content-type", value: "text/html" }] 29 | }, 30 | extraHeaders 31 | ), 32 | body: "prerendered-body" 33 | }); 34 | }); 35 | } 36 | 37 | function itReturnsBase64edGzipResponse() { 38 | it("calls callback with prerendered body and headers", function() { 39 | expect(this.cb).toHaveBeenCalledWith(null, { 40 | status: "200", 41 | statusDescription: "OK", 42 | headers: Object.assign( 43 | {}, 44 | { 45 | "content-type": [{ key: "content-type", value: "text/html" }], 46 | "content-encoding": [{ key: "content-encoding", value: "gzip" }] 47 | } 48 | ), 49 | body: zlib.gzipSync("prerendered-body").toString("base64"), 50 | bodyEncoding: "base64" 51 | }); 52 | }); 53 | } 54 | 55 | function itForwardsRequestToPrerenderCloud(userAgent, uri) { 56 | it("sends exact URL to prerender server with leading slash", function() { 57 | expect(this.requestedPrerenderUri).toEqual(uri); 58 | }); 59 | 60 | it("sends prerendercloud middleware user-agent, and curl x-original-user-agent, and gzip", function() { 61 | expect(this.headersSentToServer).toEqual({ 62 | "user-agent": "prerender-cloud-nodejs-middleware", 63 | "accept-encoding": "gzip", 64 | "x-original-user-agent": userAgent, 65 | host: "service.prerender.cloud" 66 | }); 67 | }); 68 | } 69 | 70 | function itReturnsOriginalCloudFrontRequestWithNormalPath(uri) { 71 | it("returns original CloudFront request with normal path", function() { 72 | expect(this.cb).toHaveBeenCalledWith(null, { 73 | headers: { 74 | host: [{ value: "origin-request-gets-s3-origin.s3.amazonaws.com", key: "Host" }], 75 | "user-agent": [{ value: "CloudFront", key: "User-Agent" }] 76 | }, 77 | clientIp: "2001:cdba::3257:9652", 78 | uri: uri, 79 | method: "GET" 80 | }); 81 | }); 82 | } 83 | 84 | beforeEach(function() { 85 | const self = this; 86 | this.prerenderServer = nock("https://service.prerender.cloud") 87 | .get(/.*/) 88 | .delay(10) 89 | .reply(function(uri) { 90 | self.requestedPrerenderUri = uri; 91 | self.headersSentToServer = this.req.headers; 92 | return [ 93 | 200, 94 | self.prerenderedContent || "prerendered-body", 95 | { "content-type": "text/html" } 96 | ]; 97 | }); 98 | this.handler = handler.originRequest; 99 | this.event = { 100 | Records: [ 101 | { 102 | cf: { 103 | request: { 104 | headers: { 105 | host: [ 106 | { 107 | value: "origin-request-gets-s3-origin.s3.amazonaws.com", 108 | key: "Host" 109 | } 110 | ], 111 | "user-agent": [ 112 | { 113 | value: "CloudFront", 114 | key: "User-Agent" 115 | } 116 | ] 117 | }, 118 | clientIp: "2001:cdba::3257:9652", 119 | uri: "", 120 | method: "GET" 121 | } 122 | } 123 | } 124 | ] 125 | }; 126 | this.context = {}; 127 | }); 128 | 129 | function withInputs(userAgent, uri, shouldPrerender) { 130 | beforeEach(function() { 131 | this.event.Records[0].cf.request.uri = util.createUri( 132 | uri, 133 | shouldPrerender, 134 | "d123.cf.net" 135 | ); 136 | this.event.Records[0].cf.request.headers[ 137 | "prerendercloud-lambda-edge-original-user-agent" 138 | ] = [ 139 | { 140 | value: userAgent, 141 | key: "prerendercloud-lambda-edge-original-user-agent" 142 | } 143 | ]; 144 | }); 145 | } 146 | 147 | function withGzip() { 148 | beforeEach(function() { 149 | this.event.Records[0].cf.request.headers["accept-encoding"] = [ 150 | { key: "accept-encoding", value: "gzip" } 151 | ]; 152 | }); 153 | } 154 | 155 | function withPrerenderedContent(content) { 156 | beforeEach(function() { 157 | this.prerenderedContent = content; 158 | }); 159 | } 160 | 161 | describe("when shouldPrerender is true", function() { 162 | withInputs("whatever", "/index.html", true); 163 | 164 | describe("when accept-encoding supports gzip", function() { 165 | withGzip(); 166 | runHandlerWithOriginRequestEvent(); 167 | 168 | itReturnsBase64edGzipResponse(); 169 | }); 170 | 171 | describe("without protocol", function() { 172 | runHandlerWithOriginRequestEvent(); 173 | 174 | // defaults to https 175 | itForwardsRequestToPrerenderCloud( 176 | "whatever", 177 | "/https://d123.cf.net/index.html" 178 | ); 179 | itReturnsPrerenderCloudResponse(); 180 | }); 181 | 182 | describe("when protocol is forced to http", function() { 183 | beforeEach(function() { 184 | handler.setPrerenderCloudOption(prerendercloud => 185 | prerendercloud.set("protocol", "http") 186 | ); 187 | }); 188 | runHandlerWithOriginRequestEvent(); 189 | 190 | itForwardsRequestToPrerenderCloud( 191 | "whatever", 192 | "/http://d123.cf.net/index.html" 193 | ); 194 | itReturnsPrerenderCloudResponse(); 195 | }); 196 | }); 197 | 198 | describe("when shouldPrerender is false", function() { 199 | withInputs("whatever", "/index.html", false); 200 | runHandlerWithOriginRequestEvent(); 201 | 202 | itReturnsOriginalCloudFrontRequestWithNormalPath("/index.html"); 203 | }); 204 | 205 | // on a timeout, it will call "next" which flows through to origin 206 | // instead of prerendercloud, so we need to do the path rewrite to index.html 207 | describe("when timeout", function() { 208 | beforeEach(function() { 209 | handler.setPrerenderCloudOption(prerendercloud => 210 | prerendercloud.set("timeout", 1) 211 | ); 212 | }); 213 | 214 | describe("when an HTML file", function() { 215 | withInputs("prerendercloud", "/some/path", true); 216 | runHandlerWithOriginRequestEvent(); 217 | 218 | itReturnsOriginalCloudFrontRequestWithNormalPath("/index.html"); 219 | }); 220 | 221 | describe("when a file with an extension", function() { 222 | withInputs("prerendercloud", "/app.js", true); 223 | runHandlerWithOriginRequestEvent(); 224 | 225 | itReturnsOriginalCloudFrontRequestWithNormalPath("/app.js"); 226 | }); 227 | }); 228 | 229 | // lambda has a 1mb max response 230 | describe("when shouldPrerender is true but size is over 1mb", function() { 231 | withInputs("whatever", "/index.html", true); 232 | withPrerenderedContent(new Buffer(1048001)); 233 | 234 | runHandlerWithOriginRequestEvent(); 235 | 236 | itForwardsRequestToPrerenderCloud( 237 | "whatever", 238 | "/https://d123.cf.net/index.html" 239 | ); 240 | 241 | itReturnsOriginalCloudFrontRequestWithNormalPath("/index.html"); 242 | }); 243 | }); 244 | -------------------------------------------------------------------------------- /spec/viewerRequestSpec.js: -------------------------------------------------------------------------------- 1 | const handler = require("../handler"); 2 | const nock = require("nock"); 3 | 4 | const util = require("../lib/util"); 5 | 6 | const createUriShouldPrerender = (uri, querystring, host = "d123.cf.net") => 7 | util.createUri(uri + (querystring ? `?${querystring}` : ""), true, host); 8 | const createUriShouldNotPrerender = (uri) => 9 | util.createUri(uri, false); 10 | 11 | describe("viewerRequest", function() { 12 | beforeEach(function() { 13 | handler.setPrerenderCloudOption(prerendercloud => null); 14 | nock.cleanAll(); 15 | nock.disableNetConnect(); 16 | }); 17 | 18 | function runHandlerWithViewerRequestEvent() { 19 | beforeEach(function(done) { 20 | this.cb = jasmine.createSpy("originalCallback").and.callFake(() => done()); 21 | this.handler(this.event, this.context, this.cb); 22 | }); 23 | } 24 | 25 | function withUserAgentAndUri(userAgent, uri, querystring) { 26 | beforeEach(function() { 27 | this.event.Records[0].cf.request.uri = uri; 28 | this.event.Records[0].cf.request.headers[ 29 | "user-agent" 30 | ][0].value = userAgent; 31 | if (querystring) { 32 | this.event.Records[0].cf.request.querystring = querystring; 33 | } 34 | }); 35 | } 36 | 37 | function itPrerenders(userAgent, uri, querystring) { 38 | it("modifies request object with base64 encoded JSON string that has path and user-agent", function() { 39 | expect(this.cb).toHaveBeenCalledWith(null, { 40 | headers: { 41 | host: [{ value: "d123.cf.net", key: "Host" }], 42 | "user-agent": [{ value: userAgent, key: "User-Agent" }], 43 | [util.USER_AGENT_PLACEHOLDER]: [ 44 | { value: userAgent, key: util.USER_AGENT_PLACEHOLDER } 45 | ] 46 | }, 47 | clientIp: "2001:cdba::3257:9652", 48 | uri: createUriShouldPrerender(uri, querystring), 49 | querystring: querystring || "", 50 | method: "GET" 51 | }); 52 | }); 53 | } 54 | 55 | function itDoesNotPrerender(userAgent, uri, querystring) { 56 | it("modifies request object with base64 encoded JSON string that has path and user-agent", function() { 57 | expect(this.cb).toHaveBeenCalledWith(null, { 58 | headers: { 59 | host: [{ value: "d123.cf.net", key: "Host" }], 60 | "user-agent": [{ value: userAgent, key: "User-Agent" }] 61 | }, 62 | clientIp: "2001:cdba::3257:9652", 63 | uri: createUriShouldNotPrerender(uri), // the URI will not include query string when not pre-rendering 64 | querystring: querystring || "", 65 | method: "GET" 66 | }); 67 | }); 68 | } 69 | 70 | beforeEach(function() { 71 | this.handler = handler.viewerRequest; 72 | this.event = { 73 | Records: [ 74 | { 75 | cf: { 76 | request: { 77 | headers: { 78 | host: [ 79 | { 80 | value: "d123.cf.net", 81 | key: "Host" 82 | } 83 | ], 84 | "user-agent": [ 85 | { 86 | value: "test-agent", 87 | key: "User-Agent" 88 | } 89 | ] 90 | }, 91 | clientIp: "2001:cdba::3257:9652", 92 | uri: "/index.html", 93 | method: "GET", 94 | querystring: "" 95 | } 96 | } 97 | } 98 | ] 99 | }; 100 | this.context = {}; 101 | }); 102 | 103 | describe("with all user-agents enabled (default)", function() { 104 | describe("curl user-agent", function() { 105 | describe("html files", function() { 106 | describe("html extension", function() { 107 | withUserAgentAndUri("curl", "/index.html"); 108 | runHandlerWithViewerRequestEvent(); 109 | 110 | itPrerenders("curl", "/index.html"); 111 | }); 112 | describe("no extension or trailing slash", function() { 113 | withUserAgentAndUri("curl", "/index"); 114 | runHandlerWithViewerRequestEvent(); 115 | 116 | itPrerenders("curl", "/index"); 117 | }); 118 | describe("trailing slash", function() { 119 | withUserAgentAndUri("curl", "/index/"); 120 | runHandlerWithViewerRequestEvent(); 121 | 122 | itPrerenders("curl", "/index/"); 123 | }); 124 | describe("with query string", function() { 125 | withUserAgentAndUri("curl", "/index.html", "a=b&c=d"); 126 | runHandlerWithViewerRequestEvent(); 127 | 128 | itPrerenders("curl", "/index.html", "a=b&c=d"); 129 | }); 130 | }); 131 | describe("non html files", function() { 132 | withUserAgentAndUri("curl", "/app.js"); 133 | runHandlerWithViewerRequestEvent(); 134 | 135 | itDoesNotPrerender("curl", "/app.js"); 136 | }); 137 | }); 138 | 139 | // since shouldPrerender is false, it rewrites uri to /index.html for cache-key 140 | describe("prerendercloud user-agent", function() { 141 | describe("html files", function() { 142 | describe("html extension", function() { 143 | withUserAgentAndUri("prerendercloud random-suffix", "/index.html"); 144 | runHandlerWithViewerRequestEvent(); 145 | 146 | itDoesNotPrerender("prerendercloud random-suffix", "/index.html"); 147 | }); 148 | describe("no extension or trailing slash", function() { 149 | withUserAgentAndUri("prerendercloud random-suffix", "/index"); 150 | runHandlerWithViewerRequestEvent(); 151 | 152 | itDoesNotPrerender("prerendercloud random-suffix", "/index.html"); 153 | }); 154 | describe("trailing slash", function() { 155 | withUserAgentAndUri("prerendercloud random-suffix", "/index/"); 156 | runHandlerWithViewerRequestEvent(); 157 | 158 | itDoesNotPrerender("prerendercloud random-suffix", "/index.html"); 159 | }); 160 | describe("with query string", function() { 161 | withUserAgentAndUri( 162 | "prerendercloud random-suffix", 163 | "/index/", 164 | "a=b&c=d" 165 | ); 166 | runHandlerWithViewerRequestEvent(); 167 | 168 | itDoesNotPrerender( 169 | "prerendercloud random-suffix", 170 | "/index.html", 171 | "a=b&c=d" 172 | ); 173 | }); 174 | }); 175 | 176 | // even though shouldPrerender is false, the uri is not HTML so it preserves uri for cache-key 177 | describe("non html files", function() { 178 | withUserAgentAndUri("prerendercloud random-suffix", "/app.js"); 179 | runHandlerWithViewerRequestEvent(); 180 | 181 | itDoesNotPrerender("prerendercloud random-suffix", "/app.js"); 182 | }); 183 | 184 | // blacklisted files should not be rewritten to index.html 185 | describe("html files that are blacklisted", function() { 186 | beforeEach(function() { 187 | handler.setPrerenderCloudOption(prerendercloud => 188 | prerendercloud.set("blacklistPaths", req => ["/blacklisted.html"]) 189 | ); 190 | }); 191 | withUserAgentAndUri( 192 | "prerendercloud random-suffix", 193 | "/blacklisted.html" 194 | ); 195 | runHandlerWithViewerRequestEvent(); 196 | 197 | itDoesNotPrerender("prerendercloud random-suffix", "/blacklisted.html"); 198 | }); 199 | 200 | describe("html files that are blacklisted as wildcard", function() { 201 | beforeEach(function() { 202 | handler.setPrerenderCloudOption(prerendercloud => 203 | prerendercloud.set("blacklistPaths", req => ["/signin/*"]) 204 | ); 205 | }); 206 | withUserAgentAndUri("prerendercloud random-suffix", "/signin/oauth"); 207 | runHandlerWithViewerRequestEvent(); 208 | 209 | itDoesNotPrerender("prerendercloud random-suffix", "/signin/oauth"); 210 | }); 211 | 212 | // ensure conditional logic around blacklist doesn't break non html files 213 | describe("non html while blacklist exists", function() { 214 | beforeEach(function() { 215 | handler.setPrerenderCloudOption(prerendercloud => 216 | prerendercloud.set("blacklistPaths", req => ["/blacklisted.html"]) 217 | ); 218 | }); 219 | withUserAgentAndUri("prerendercloud random-suffix", "/blacklisted.js"); 220 | runHandlerWithViewerRequestEvent(); 221 | 222 | itDoesNotPrerender("prerendercloud random-suffix", "/blacklisted.js"); 223 | }); 224 | }); 225 | }); 226 | 227 | describe("with botsOnly user-agents", function() { 228 | beforeEach(function() { 229 | handler.setPrerenderCloudOption(prerendercloud => 230 | prerendercloud.set("botsOnly", true) 231 | ); 232 | }); 233 | // since shouldPrerender is true, it preserves uri for cache-key 234 | describe("twitterbot user-agent", function() { 235 | withUserAgentAndUri("twitterbot", "/nested/path"); 236 | runHandlerWithViewerRequestEvent(); 237 | 238 | itPrerenders("twitterbot", "/nested/path"); 239 | }); 240 | 241 | // since shouldPrerender is false, it rewrites uri for cache-key 242 | describe("curl user-agent", function() { 243 | withUserAgentAndUri("curl", "/nested/path"); 244 | runHandlerWithViewerRequestEvent(); 245 | 246 | itDoesNotPrerender("curl", "/index.html"); 247 | }); 248 | 249 | describe("prerendercloud user-agent", function() { 250 | withUserAgentAndUri("prerendercloud random-suffix", "/index.html"); 251 | runHandlerWithViewerRequestEvent(); 252 | 253 | itDoesNotPrerender("prerendercloud random-suffix", "/index.html"); 254 | }); 255 | }); 256 | }); 257 | -------------------------------------------------------------------------------- /handler.js: -------------------------------------------------------------------------------- 1 | // http://docs.aws.amazon.com/lambda/latest/dg/lambda-edge.html 2 | // http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-at-the-edge.html 3 | // http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html#limits-lambda-at-edge 4 | 5 | "use strict"; 6 | const ViewerRequestInterface = require("./lib/ViewerRequestInterface"); 7 | const OriginRequestInterface = require("./lib/OriginRequestInterface"); 8 | 9 | const prerendercloud = require("prerendercloud"); 10 | 11 | const origSet = prerendercloud.set; 12 | let cachedOptions = {}; 13 | prerendercloud.set = function (optName, val) { 14 | origSet.apply(undefined, arguments); 15 | cachedOptions[optName] = val; 16 | }; 17 | 18 | const resetPrerenderCloud = () => { 19 | prerendercloud.resetOptions(); 20 | cachedOptions = {}; 21 | 22 | // default prerender.cloud timeout is 10s 23 | // - so if it takes longer than 11s, either prerender.cloud is down or backed up 24 | // max Lambda@Edge timeout is 30s 25 | prerendercloud.set("retries", 1); 26 | prerendercloud.set("timeout", 11000); 27 | 28 | // * CONFIGURATION * 29 | 30 | // 1. prerenderToken (API token, you'll be rate limited without it) 31 | // Get it after signing up at https://www.prerender.cloud/ 32 | // note: Lambda@Edge doesn't support env vars, so hardcoding is your only option. 33 | // prerendercloud.set("prerenderToken", "mySecretToken") 34 | 35 | // 2. protocol (optional, default is https) 36 | // use this to force a certain protocol for requests from service.prerender.cloud to your origin 37 | // example use case: if your origin is http only 38 | // prerendercloud.set("protocol", "http"); 39 | 40 | // 3. host (optional, will infer from host header if not set here) 41 | // If having issues, try setting this to your custom domain (something like example.com) 42 | // or if you don't have one, then the CloudFront distribution URL (something like d1pxreml448ujs.cloudfront.net). 43 | // Note, setting this config option shouldn't be necessary 44 | // example value: example.com or d1pxreml448ujs.cloudfront.net (don't include the protocol) 45 | // prerendercloud.set("host", ""); 46 | 47 | // 4. removeTrailingSlash (recommended) 48 | // Removes trailing slash from URLs to increase prerender.cloud server cache hit rate 49 | // the only reason not to enable this is if you use "strict routing" 50 | // that is, you treat /docs/ differently than /docs (trailing slash) which is rare 51 | // prerendercloud.set("removeTrailingSlash", true); 52 | 53 | // 5. whitelistQueryParams (recommended) 54 | // improves cache hit rate by dropping query params not in the whitelist 55 | // must be a function that returns null or array 56 | // * default (null) preserves all query params 57 | // * empty array drops all query params 58 | // prerendercloud.set("whitelistQueryParams", req => ["page"]); 59 | 60 | // 6. botsOnly 61 | // generally not recommended due to potential google SEO cloaking penalties no one fully understands 62 | // prerendercloud.set("botsOnly", true); 63 | 64 | // 7. whitelistUserAgents 65 | // specify your own list of bots 66 | // useful when you only care about open graph previews (in which case, metaOnly also makes sense) 67 | // prerendercloud.set('whitelistUserAgents', ['twitterbot', 'slackbot', 'facebookexternalhit']); 68 | 69 | // 8. metaOnly 70 | // only prerender the and <meta> tags in the <head> section. The returned HTML payload will otherwise be unmodified. 71 | // useful if you don't care about server-side rendering but want open-graph previews to work everywhere 72 | // must be a function that receives a req object, and returns a bool 73 | // eg1: 74 | // prerendercloud.set('metaOnly', req => req.url === "/long-page-insuitable-for-full-prerender" ? true : false); 75 | // eg2: 76 | // prerendercloud.set('metaOnly', () => true); 77 | 78 | // 9. disableServerCache 79 | // Disable the cache on prerender.cloud (default is enabled with 5 minute duration). 80 | // It probably makes sense to disable the prerender.cloud server cache 81 | // since CloudFront is caching things for you. 82 | // Pros/Cons of disabling prerender.cloud server cache: 83 | // Pros 84 | // - when you invalidate CloudFront, the next page load will be guaranteed fresh 85 | // Cons 86 | // - when you invalid CloudFront each page load will require a new prerender call 87 | // (so if you regularly invalidate even if the content hasn't changed, you're slowing 88 | // things down unnecessarily) 89 | // prerendercloud.set('disableServerCache', true); 90 | 91 | // 10. blacklistPaths (not for blacklisting paths in your SPA, but for static files that shouldn't be pre-rendered) 92 | // the viewer-request function can't see what files exist on origin so you may need this 93 | // if you have HTML files that should not be pre-rendered (e.g. google/apple/fb verification files) 94 | // trailing * works as a wildcard 95 | // NOTE: this is for static files that you don't want pre-rendered, not SPA routes - for those, use shouldPrerenderAdditionalCheck 96 | // prerendercloud.set('blacklistPaths', req => ['/facebook-domain-verification.html', '/signin/*', '/google*']); 97 | 98 | // 11. removeScriptsTag (not recommended) 99 | // Removes all scripts/JS, useful if: 100 | // - trying to get under 1MB Lambda@Edge limit 101 | // - having problems with your JS app taking over from the pre-rendered content 102 | // Huge caveat: this also means your app will no longer be a "single-page app" since 103 | // all of the JavaScript will be gone 104 | // prerendercloud.set("removeScriptTags", true); 105 | 106 | // 12. disableAjaxPreload 107 | // "Ajax Preload" is a monkey-patch, included by default when metaOnly is false/null. 108 | // It prevents screen flicker/repaint/flashing, but increases initial page load size 109 | // (because it embeds the AJAX responses into your HTML). 110 | // you can disable this if: 111 | // * you have metaOnly set to true 112 | // * you don't make any AJAX/XHR requests 113 | // * you don't care about a brief flicker/flash 114 | // * or finally, the best option: you manage your own via prerender.cloud's __PRELOADED_STATE__ special global var 115 | // Read more: 116 | // - https://www.prerender.cloud/docs/server-client-transition 117 | // - https://github.com/sanfrancesco/prerendercloud-ajaxmonkeypatch 118 | // prerendercloud.set("disableAjaxPreload", true); 119 | 120 | // 13. shouldPrerenderAdditionalCheck 121 | // Runs in addition to the default user-agent check. Useful if you have your own conditions 122 | // e.g. blacklisting paths in your SPA, or only pre-rendering certain paths 123 | // just return true or false, your data is: req.headers and req.url 124 | // const blacklistSpaPaths = [ 125 | // "/some-page-that-prerenders-poorly", 126 | // "/auth/customer-profile/*", 127 | // "/interactive*", 128 | // ]; 129 | // prerendercloud.set("shouldPrerenderAdditionalCheck", (req) => 130 | // isNotBlocked(blacklistSpaPaths, req) 131 | // ); 132 | 133 | // 14. see all configuration options here: https://github.com/sanfrancesco/prerendercloud-nodejs 134 | 135 | // for tests 136 | if (prerenderCloudOption) prerenderCloudOption(prerendercloud); 137 | }; 138 | 139 | const isNotBlocked = (blacklistSpaPaths, req) => { 140 | return !blacklistSpaPaths.some((pattern) => { 141 | if (pattern.endsWith("*")) { 142 | return req.url.startsWith(pattern.slice(0, -1)); 143 | } 144 | return req.url === pattern; 145 | }); 146 | }; 147 | 148 | module.exports.viewerRequest = (event, context, callback) => { 149 | resetPrerenderCloud(); 150 | 151 | const cloudFrontRequest = event.Records[0].cf.request; 152 | console.log("viewerRequest", JSON.stringify(cloudFrontRequest)); 153 | 154 | prerendercloud.set("beforeRender", (req, done) => { 155 | // FYI: if this block is called, it means we shouldPrerender 156 | 157 | // force the middleware to call res.writeHead and res.end immediately 158 | // instead of the remote prerender. (this allows us to use most of the 159 | // code from the prerendercloud lib and bail out at last moment) 160 | done(null, "noop"); 161 | }); 162 | 163 | const { req, res, next } = ViewerRequestInterface.create( 164 | cachedOptions, 165 | cloudFrontRequest, 166 | callback 167 | ); 168 | 169 | prerendercloud(req, res, next); 170 | }; 171 | 172 | module.exports.originRequest = (event, context, callback) => { 173 | resetPrerenderCloud(); 174 | 175 | // temporary until timeout function of prerendercloud or got is fixed 176 | // so it cancels request when timeout is reached 177 | // https://github.com/sindresorhus/got/issues/344 178 | // https://github.com/sindresorhus/got/pull/360 179 | context.callbackWaitsForEmptyEventLoop = false; 180 | 181 | const cloudFrontRequest = event.Records[0].cf.request; 182 | console.log("originRequest", JSON.stringify(cloudFrontRequest)); 183 | 184 | const { req, res, next, shouldPrerender } = OriginRequestInterface.create( 185 | cachedOptions, 186 | cloudFrontRequest, 187 | callback 188 | ); 189 | 190 | // we override the prerendercloud lib's default userAgent logic 191 | // for deciding when to prerender because we've already computed it 192 | // in the viewer-request, and encoded it into the URI, which is now in the `shouldPrerender` var 193 | prerendercloud.set("shouldPrerender", () => shouldPrerender); 194 | 195 | if (shouldPrerender) { 196 | console.log("originRequest calling service.prerender.cloud:", { 197 | host: req.headers.host, 198 | url: req.url, 199 | }); 200 | } else { 201 | console.log("originRequest calling next", { 202 | host: req.headers.host, 203 | url: req.url, 204 | }); 205 | } 206 | 207 | prerendercloud(req, res, next); 208 | }; 209 | 210 | module.exports.originResponse = (event, context, callback) => { 211 | const cloudFrontResponse = event.Records[0].cf.response; 212 | // console.log("originResponse", JSON.stringify(cloudFrontResponse)); 213 | 214 | if (cloudFrontResponse.status === "404") { 215 | cloudFrontResponse.body = ` 216 | <html> 217 | <head> 218 | <title>Not Found 219 | 220 | 404 - Not Found 221 | 222 | `; 223 | cloudFrontResponse.headers["content-type"] = [ 224 | { key: "Content-Type", value: "text/html" }, 225 | ]; 226 | } 227 | 228 | callback(null, cloudFrontResponse); 229 | }; 230 | 231 | // for tests 232 | var prerenderCloudOption; 233 | module.exports.setPrerenderCloudOption = (cb) => { 234 | prerenderCloudOption = cb; 235 | }; 236 | 237 | // for validation 238 | module.exports.resetPrerenderCloud = resetPrerenderCloud; 239 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Note: please first test your app** with our quick and simple all-in-one solution here: https://github.com/sanfrancesco/prerendercloud-server before going through this Lambda@Edge+CloudFront setup process. Once that's debugged and working the way you want it, come back here to configure Lambda@Edge+CloudFront. This will save you hours of headache because debugging/iterating with Cloudfront/Lambda/S3 is slow. 2 | 3 | # Pre-render CloudFront (via AWS Lambda@Edge) 4 | 5 | 6 | 7 | ![Github Actions CI](https://github.com/sanfrancesco/prerendercloud-lambda-edge/actions/workflows/node.js.yml/badge.svg) 8 | 9 | 4-minute YouTube video walk-through (2024-07-10 update: AWS UI/UX has slightly changed since video but the video is still accurate): [https://youtu.be/SsMNQ3EaNZ0](https://youtu.be/SsMNQ3EaNZ0) 10 | 11 | TL;DR: 12 | 13 | - step 1: put www files in s3 bucket 14 | - step 2: create cloudfront distribution pointing at s3 bucket 15 | - step 3: clone this repo, `npm install` and run: `CLOUDFRONT_DISTRIBUTION_ID=yourDistributionId make deploy` (set yourDistributionId to what was created in step 2) 16 | - step 4: wait ~5 minutes for aws systems to propagate (cloudfront url will show some error in the meantime) 17 | 18 | full guidance below or in video: 19 | 20 | 4min how-to set up lambda@edge for pre-rendering 23 |
24 | 25 | Server-side rendering (pre-rendering) via Lambda@Edge for single-page apps hosted on CloudFront with an s3 origin. It forwards requests to [Headless-Render-API.com](https://headless-render-api.com) to be pre-rendered using a headless Chrome browser. 26 | 27 | This is a [serverless](https://github.com/serverless/serverless) project with a `make deploy` command that: 28 | 29 | 1. [serverless.yml](serverless.yml) deploys 3 functions to Lambda (`viewerRequest`, `originRequest`, `originResponse`) 30 | 2. [deploy.js](deploy.js) associates them with your CloudFront distribution 31 | 3. [create-invalidation.js](create-invalidation.js) clears/invalidates your CloudFront cache 32 | 33 | Read more: 34 | 35 | - [Headless-Render-API.com](https://headless-render-api.com) (formerly named prerender.cloud from 2016 - 2022) 36 | - [Dec, 2016 Lambda@Edge intro](https://aws.amazon.com/blogs/aws/coming-soon-lambda-at-the-edge/) 37 | - [Lambda@Edge docs](http://docs.aws.amazon.com/lambda/latest/dg/lambda-edge.html) 38 | - [CloudFront docs for Lambda@Edge](http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-at-the-edge.html) 39 | 40 | #### 1. Prerequisites 41 | 42 | 1. S3 bucket with index.html and JavaScript files 43 | 2. CloudFront distribution pointing to that S3 bucket (that also has \* read access to that bucket) 44 | 45 | Start with a new test bucket and CloudFront distribution before modifying your production account: 46 | 47 | (it'll be quick because you'll be using the defaults with just 1 exception) 48 | 49 | - S3 bucket in us-east-1 with default config (doesn't need to be public and doesn't need static web hosting) 50 | - yes, us-east-1 makes things easier (using any other region will require a URL change for your CloudFront origin) 51 | - CloudFront distribution with S3 origin with default config except: 52 | - (give CloudFront access to that bucket) 53 | - "Restrict Bucket Access" = "Yes" 54 | - "Origin Access Identity" = "Create a New Identity" 55 | - "Grant Read Permissions on Bucket" = "Yes, Update Bucket Policy" 56 | - (alternatively your S3 bucket [can be public - meaning an access policy that allows getObject on `*` for `*`](http://docs.aws.amazon.com/AmazonS3/latest/dev/example-bucket-policies.html#example-bucket-policies-use-case-2)) 57 | - recommend enabling "automatic compression" 58 | 59 | That's all you need. Now just wait a few minutes for the CloudFront DNS to propogate. 60 | 61 | Note, you **will not be creating** a CloudFront "custom error response" that redirects 404s to index.html, and if you already have one, then remove it - because this project uploads a Lambda@Edge function that replaces that functionality (if you don't remove it, this project won't work). 62 | 63 | #### 2. Clone this repo 64 | 65 | `$ git clone https://github.com/sanfrancesco/prerendercloud-lambda-edge.git` 66 | 67 | #### 3. Install Dependencies 68 | 69 | Node v20 (it works with node as low as v12 but aws lambda requires latest version of nodejs), and npm 70 | 71 | `$ npm install` 72 | 73 | #### 4. Hardcode your headless-render-api.com auth token 74 | 75 | Edit [handler.js](handler.js) and set your headless-render-api.com API token (cmd+f for `prerenderToken`) 76 | 77 | note: Headless-Render-API.com was previously known as Prerender.cloud (rebranded 2022-05-01) 78 | 79 | #### 5. Edit any other configs (optional) 80 | 81 | e.g. `botsOnly`, `removeTrailingSlash` in [handler.js](handler.js) 82 | 83 | #### 6. Remove CloudFront custom error response for 404->index.html 84 | 85 | **(this step is only necessary if you are using an existing CloudFront distribution)** 86 | 87 | If you're using an existing CloudFront distribution, you need to remove this feature. 88 | 89 | It has to be removed because it prevents the execution of the viewer-request function. This project replicates that functionality (see caveats) 90 | 91 | 1. go here: https://console.aws.amazon.com/cloudfront/home 92 | 2. click on your CloudFront distribution 93 | 3. click the "error pages" tab 94 | 4. make note of the TTL settings (in case you need to re-create it) 95 | 5. and delete the custom error response (because having the custom error response prevents the `viewer-request` function from executing). 96 | 97 | #### 7. Add `s3:ListBucket` permission to CloudFront user 98 | 99 | **(this step is only necessary if you want 404s to work)** 100 | 101 | Since we can't use the "custom error response", and we're implementing it ourselves, this permission is neccessary for CloudFront+Lambda@Edge to return a 404 for a requested file that doesn't exist (only non HTML files will return 404, see caveats below). If you don't add this, you'll get 403 forbidden instead. 102 | 103 | 1. go to [s3 console](https://console.aws.amazon.com/s3/home?region=us-east-1) 104 | 2. click on the bucket you created in step 1 for this project 105 | 3. click "permissions" 106 | 4. click "bucket policy" 107 | 5. modify the Action and Resource to each be an array, they should look like (change the bucket name in resource as appropriate): 108 | 109 | ``` 110 | "Action": [ 111 | "s3:GetObject", 112 | "s3:ListBucket" 113 | ], 114 | "Resource": [ 115 | "arn:aws:s3:::CHANGE_THIS_TO_YOUR_BUCKET_NAME_FROM_STEP_1/*", 116 | "arn:aws:s3:::CHANGE_THIS_TO_YOUR_BUCKET_NAME_FROM_STEP_1" 117 | ] 118 | ``` 119 | 120 | If you're not editing an IAM policy specifically, the UI/UX checkbox for this in the S3 interface is, for the bucket, under the "Permissions" tab, "List Objects" 121 | 122 | You can modify the content of the 404 page in [handler.js](handler.js) 123 | 124 | #### 8. Lambda@Edge function Deployment (only needs to be done once, or whenever you `git pull` from this repo) 125 | 126 | 1. Make sure there's a "default" section in your ~/.aws/credentials file with aws*access_key_id/aws_secret_access_key that have any of the following permissions: (full root, or see [serverless discussion](https://github.com/serverless/serverless/issues/1439) or you can use the following policies, which are \_almost* root: [AWSLambdaFullAccess, AwsElasticBeanstalkFullAccess]) 127 | 2. now run: `$ CLOUDFRONT_DISTRIBUTION_ID=whateverYourDistributionIdIs make deploy` 128 | 3. See the created Lambda function in Lambda: https://console.aws.amazon.com/lambda/home?region=us-east-1#/functions 129 | 4. See the created Lambda function in CloudFront: (refresh it, click your distribution, then the behaviors tab, then the checkbox + edit button for the first item in the list, then scroll to bottom of that page to see "Lambda Function Associations") 130 | 131 | #### 9. Deployment (of your single-page application) 132 | 133 | 1. sync/push the files to s3 134 | 2. invalidate CloudFront 135 | 3. you're done (no need to deploy the Lambda@Edge function after this initial setup) 136 | 137 | caveat: note that headless-render-api.com has a 5-minute server cache that you can disable, see `disableServerCache` in [handler.js](/handler.js) 138 | 139 | #### 10. You're done! 140 | 141 | Visit a URL associated with your CloudFront distribution. **It will take a few seconds** for the first request (because it is pre-rendered on the first request). If for some reason the pre-render request fails or times out, the non-pre-rendered request will be cached. 142 | 143 | #### Viewing AWS Logs in CloudWatch 144 | 145 | See logs in CloudWatch in region closest to where you made the request from (although the function is deployed to us-east-1, it is replicated in all regions). 146 | 147 | To view logs from command line: 148 | 149 | 1. use an AWS account with `CloudWatchLogsReadOnlyAccess` 150 | 2. `$ pip install awslogs` ( https://github.com/jorgebastida/awslogs ) 151 | - `AWS_REGION=us-west-2 awslogs get -s '1h' /aws/lambda/us-east-1.Lambda-Edge-Prerendercloud-dev-viewerRequest` 152 | - `AWS_REGION=us-west-2 awslogs get -s '1h' /aws/lambda/us-east-1.Lambda-Edge-Prerendercloud-dev-originRequest` 153 | - (change `AWS_REGION` to whatever region is closest to where you physically are since that's where the logs will be) 154 | - (FYI, for some reason, San Francisco based requests are ending up in us-west-2) 155 | 156 | #### Viewing Headless-Render-API.com logs 157 | 158 | Sign in to headless-render-api.com web app and you'll see the last few requests made for your API key. 159 | 160 | #### Cleanup 161 | 162 | `$ make destroy` will attempt to remove the Lambda@Edge functions - but as of Nov 2017, AWS still doesn't allow deleting "replicated functions" - in which case, just unnassociate them from your CloudFront distribution until delete functionality works. 163 | 164 | This also means if you attempt to delete and recreate the functions, it will fail - so you'll need to change the name in [serverless.yml](serverless.yml) and [deploy.js](deploy.js) (just append a v2) 165 | 166 | You can also sign into AWS and go to CloudFormation and manually remove things. 167 | 168 | ## Caveats 169 | 170 | 1. If you can't tolerate a slow first request (where subsequent requests are served from cache in CloudFront): 171 | - crawl before invalidating the CloudFront distrubtion - just hit all of the URLs with [service.headless-render-api.com](https://headless-render-api.com/docs/api) and configure a `prerender-cache-duration` of something longer than the default of 5 minutes (300) - like 1 week (604800). 172 | 2. This solution will serve index.html in place of something like `/some-special-file.html` even if `/some-special-file.html` exists on your origin 173 | - We're waiting for the Lambda@Edge to add a feature to address this 174 | - in the meantime use the `blacklistPaths` option (see [handler.js](https://github.com/sanfrancesco/prerendercloud-lambda-edge/blob/ccd87b5484a4334d823dbb8f0df16e843b2dc910/handler.js#L81)) 175 | 3. Redirects (301/302 status codes) 176 | - if you use `` to initiate a redirect, your CloudFront TTL must be zero, otherwise CloudFront will cache the body/response and return status code 200 with the body from the redirected path 177 | 178 | ## Updating Node.js runtime 179 | 180 | Simply update [serverless.yaml](./serverless.yml) to the [latest or whatever you prefer](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-requirements-limits.html#lambda-requirements-lambda-function-configuration) and re-deploy: `make deploy` 181 | 182 | ## Troubleshooting 183 | 184 | - Read through the console output from the `make deploy` command and look for errors 185 | - Check your user-agent if using botsOnly 186 | - Sometimes (rarely) you'll see an error message on the webpage itself. 187 | - Check the AWS logs (see section "Viewing AWS Logs in CloudWatch") 188 | - Check headless-render-api.com logs (see section "Viewing headless-render-api.com logs") 189 | - Sometimes (rarely) there's an actual problem with AWS Lambda and you [may just need to re-deploy](https://www.reddit.com/r/aws/comments/7gumv7/question_aws_lambda_nodejs610_environment_issue/) 190 | --------------------------------------------------------------------------------