├── compare
    └── .gitkeep
├── static
    └── .gitkeep
├── .gitignore
├── docs
    └── project-process-flow.png
├── babel.config.js
├── server.js
├── jest.config.js
├── .github
    ├── dependabot.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── publish_staging.yml
    │   ├── deploy-without-scraping.yml
    │   └── publish.yml
├── scraper
    ├── config.mjs
    ├── sitemap.mjs
    ├── scrape.mjs
    ├── assetScraper.mjs
    └── htmlScraper.mjs
├── tests
    ├── ngrok.test.js
    ├── test.template.js
    └── setup.mjs
├── .vscode
    └── settings.json
├── package.json
├── LICENSE
├── helpers
    └── utils.mjs
└── README.MD


/compare/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | static/*
3 | !static/.gitkeep
4 | .env
5 | 


--------------------------------------------------------------------------------
/docs/project-process-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/offerzen/wombat/HEAD/docs/project-process-flow.png


--------------------------------------------------------------------------------
/babel.config.js:
--------------------------------------------------------------------------------
1 | const presets = [
2 |   [
3 |     "@babel/preset-env",
4 |   ]
5 | ];
6 | 
7 | module.exports = { presets };
8 | 


--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------
1 | const express = require('express');
2 | const app = express();
3 | 
4 | app.use(express.static('static'));
5 | 
6 | app.listen(3333,  () => console.log('Server started!'));
7 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | require('dotenv').config()
2 | 
3 | module.exports = {
4 |   testEnvironment: "node",
5 |   globalSetup: '<rootDir>/tests/setup.mjs',
6 |   moduleFileExtensions: ['js', 'mjs']
7 | };
8 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "weekly"
 7 |   - package-ecosystem: "npm"
 8 |     directory: "/"
 9 |     schedule:
10 |       interval: "weekly"
11 | 


--------------------------------------------------------------------------------
/scraper/config.mjs:
--------------------------------------------------------------------------------
1 | import path from 'path';
2 | import { parseSitemap } from './sitemap.mjs';
3 | 
4 | const site = process.env.SITE.split(',')[0].trim();
5 | export const urls = await parseSitemap(site);
6 | 
7 | const buildDirectory = 'compare';
8 | export const baseDirectory = path.join(process.cwd(), buildDirectory);
9 | 


--------------------------------------------------------------------------------
/tests/ngrok.test.js:
--------------------------------------------------------------------------------
1 | const testIf = (condition) => condition ? test : test.skip;
2 |   
3 | testIf(!global.site.match(/(webflow\.io)/ig)?.length).each(global.urls)('ngrok for %s', (url) => {
4 |   // Match based on number so it's easy to see how many matches exist in a page
5 |   const matches = global.html(url).match(/ngrok/ig)?.length ?? 0;
6 | 
7 |   expect(matches).toBe(0);
8 | });
9 | 


--------------------------------------------------------------------------------
/tests/test.template.js:
--------------------------------------------------------------------------------
1 | // Run the test for every URL. `global.urls` is created in `/tests/setup.mjs`
2 | test.each(global.urls)('Something about URL: %s', (url) => {
3 |   // Use global.html(url) to get html source code for a particular URL. Any tests can be done on that
4 |   const matches = global.html(url).match(/foo/ig);
5 | 
6 |   // Jest expects
7 |   expect(matches).toBe(null);
8 | });
9 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ### :building_construction: Changed
 2 | What has been changed.
 3 | 
 4 | ### :earth_africa: Why
 5 | Why the feature is necessary, important information to consider etc.
 6 | 
 7 | ### :link: Links
 8 | - :ticket: [Jira]()
 9 | - :iphone: [Review App]()
10 | 
11 | ### :robot: QA Steps
12 | - [ ] Add QA steps here.
13 | 
14 | ### :spiral_notepad: Notes
15 | Additional notes or trade-offs.
16 | 
17 | #### For easy copy & pasting:
18 | <details>
19 | <summary>Add a dropdown</summary>
20 | Content goes here.
21 | </details>
22 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "yaml.schemas": {
 3 |     "file:///Users/jethroflanagan/.vscode/extensions/atlassian.atlascode-2.10.0/resources/schemas/pipelines-schema.json": "bitbucket-pipelines.yml",
 4 |     "https://json.schemastore.org/github-workflow.json": [
 5 |       "file:///Users/jethroflanagan/Work/offerzen/webflow-platform/.github/workflows/publish_staging.yml",
 6 |       "file:///Users/jethroflanagan/Work/offerzen/webflow-platform/.github/workflows/deploy.yml",
 7 |       "file:///Users/jethroflanagan/Work/offerzen/webflow-platform/.github/workflows/publish.yml"
 8 |     ]
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/setup.mjs:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import path from 'path';
 3 | import { baseDirectory, urls } from '../scraper/config.mjs';
 4 | 
 5 | export default async function (globalConfig, projectConfig) {
 6 | 
 7 |   global.site = process.env.SITE || '';
 8 | 
 9 |   // These are taken from the sitemap
10 |   // Filters out testing and staging pages so that only production url get tested
11 |   global.urls = urls.filter((url) => !url.match(/(\/test\-)/i)?.length);
12 | 
13 |   // Synchronous reading of source code for any given url (read from `/compare`)
14 |   global.html = (url) => {
15 |     // Tidies up directory to not include https://
16 |     const httpsSlash = url.indexOf('//') + 2;
17 |     const directory = url.slice(httpsSlash);
18 | 
19 |     const filePath = path.join(baseDirectory, directory, 'index.html');
20 | 
21 |     return fs.readFileSync(filePath, 'utf-8');
22 |   };
23 | };
24 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "webflow-deployment-platform",
 3 |   "version": "1.0.0",
 4 |   "description": "Scrape Webflow projects, run tests, report",
 5 |   "license": "MIT",
 6 |   "scripts": {
 7 |     "build": "node ./build/index.mjs",
 8 |     "test": "jest",
 9 |     "scrape": "node scraper/scrape.mjs",
10 |     "doc-tree": "tree -I 'node_modules|yarn.lock|README.MD|package.json|compare|static'"
11 |   },
12 |   "dependencies": {
13 |     "@babel/preset-env": "^7.18.2",
14 |     "axios": "^0.27.2",
15 |     "cheerio": "^1.0.0-rc.11",
16 |     "command-line-args": "^5.2.1",
17 |     "dotenv": "^16.0.1",
18 |     "env-cmd": "^10.1.0",
19 |     "express": "^4.18.2",
20 |     "fs-extra": "^10.1.0",
21 |     "html-minifier": "^4.0.0",
22 |     "jest": "^28.1.1",
23 |     "rimraf": "^3.0.2",
24 |     "sitemapper": "^3.2.2",
25 |     "uncss": "^0.17.3",
26 |     "url-join": "^5.0.0",
27 |     "uuid": "^9.0.0"
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/scraper/sitemap.mjs:
--------------------------------------------------------------------------------
 1 | import Sitemapper from 'sitemapper';
 2 | 
 3 | export const parseSitemap = async (site) => {
 4 |   let urlWithoutTrailingSlash = site.replace(/\/$/, '');
 5 |   if (!urlWithoutTrailingSlash.match(/^http/)) {
 6 |     urlWithoutTrailingSlash = 'https://' + urlWithoutTrailingSlash;
 7 |   }
 8 |   const sitemap = new Sitemapper({
 9 |     url: `${urlWithoutTrailingSlash}/sitemap.xml`,
10 |     timeout: 15000, // 15 seconds
11 |   });
12 | 
13 |   try {
14 |     const { sites, errors } = await sitemap.fetch();
15 |     if (errors?.length) {
16 |       console.error(errors);
17 |     }
18 | 
19 |     // Change webflow URLs to actual domain
20 |     if (site.includes('webflow.io')) {
21 |       return sites.map((url) => {
22 |         return url.replace(process.env.PRODUCTION_DOMAIN, 'webflow.io'); // e.g. PRODUCTION_DOMAIN=test.com (leave out protocol)
23 |       });
24 |     }
25 |     return sites;
26 |   } catch (error) {
27 |     console.error(error);
28 |   }
29 | };
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 OfferZen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scraper/scrape.mjs:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | 
 3 | import path from 'path';
 4 | import rimraf from 'rimraf';
 5 | import { baseDirectory, urls } from './config.mjs';
 6 | import { scrapePages } from './htmlScraper.mjs';
 7 | 
 8 | (async () => {
 9 |   await clearBaseDirectories();
10 |   scrapePages();
11 | })();
12 | 
13 | async function clearBaseDirectories() {
14 |   // clear existing content per sitemap base directories (for all possible values in sitemap) e.g. flow.offerzen.com
15 |   // Does not clear assets
16 |   try {
17 |     const sitemapBaseDirectoriesHash = {};
18 |     urls.forEach(url => {
19 |       const httpsSlash = url.indexOf('//') + 2;
20 |       const firstSlash = url.indexOf('/', httpsSlash) + 1;
21 |       const directory = url.slice(httpsSlash, firstSlash);
22 | 
23 |       return sitemapBaseDirectoriesHash[directory] = true;
24 |     });
25 |     const sitemapBaseDirectories = Object.keys(sitemapBaseDirectoriesHash);
26 | 
27 |     for (let directory of sitemapBaseDirectories) {
28 |       const directoryPath = path.join(baseDirectory, directory);
29 |       rimraf.sync(directoryPath);
30 |     }
31 |   }
32 |   catch (e) {
33 |     console.error('Clearing directories failed', e);
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/helpers/utils.mjs:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import path from 'path';
 3 | 
 4 | /**
 5 |  * Synchronously and recursively get directory contents
 6 |  * @param {string} dir                Path to files (use `path`)
 7 |  * @param {object} object             Defaults to empty object
 8 |  * @param {regex}  object.includeOnly Use single regex to define what files
 9 |  *                                    should be matched, others will be ignored
10 |  */
11 | export function getDirectoryContents(dir, { includeOnly } = {}) {
12 |   var results = [];
13 |   var list = fs.readdirSync(dir);
14 | 
15 |   list.forEach((file) => {
16 |     file = path.join(dir, file);
17 |     let include = true;
18 | 
19 |     var stat = fs.statSync(file);
20 |     if (stat && stat.isDirectory()) {
21 |       /* Recurse into a subdirectory */
22 |       results = results.concat(getDirectoryContents(file, { includeOnly }));
23 |     } else {
24 |       /* Is a file */
25 |       if (includeOnly) {
26 |         include = file.match(includeOnly) != null;
27 |       }
28 |       if (!include) return;
29 | 
30 |       results.push(file);
31 |     }
32 |   });
33 | 
34 |   return results;
35 | }
36 | 
37 | export function getLocalAssetPathFromCdn(url) {
38 |   const cdnUrl = new RegExp(`(${process.env.CDN_URL}|${process.env.WEBFLOW_CDN_URL})`.replace(/\./g, '\\.'), 'i');
39 | 
40 |   let localPath = path.resolve('static');
41 |   if (!localPath.match(`${path.sep}$`)) {
42 |     localPath += path.sep;
43 |   }
44 | 
45 |   const filePath = url.replace(cdnUrl, localPath);
46 | 
47 |   return filePath;
48 | }
49 | 
50 | export function resolveUrl(base, src) {
51 |   const url = new URL(src, new URL(base, 'resolve://'));
52 |   if (url.protocol === 'resolve:') {
53 |     // `base` can be a relative URL.
54 |     const { pathname, search, hash } = url;
55 |     return pathname + search + hash;
56 |   }
57 |   return url.toString();
58 | }
59 | 
60 | // Internal use
61 | const _pipe = (a, b) => (arg) => b(a(arg));
62 | 
63 | export const pipe = (...ops) => ops.reduce(_pipe)
64 | 


--------------------------------------------------------------------------------
/scraper/assetScraper.mjs:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import fs, { createWriteStream } from 'fs';
 3 | import path from 'path';
 4 | import { resolveUrl } from '../helpers/utils.mjs';
 5 | 
 6 | /**
 7 |  * Gets assets from the content based if it's from the webflow cdn
 8 |  * @param {object} object
 9 |  * @param {string} object.content   html or css
10 |  * @param {string} object.url       base url to resolve relative asset paths against e.g. hello.com/blah/bar will load ../foo.png as hello.com/blah/foo.png
11 |  */
12 | export function getAssetUrls({ content, url }) {
13 |   // Only copy webflow cdn content e.g. assets.website-files.com/foo/bar.png with or without an extension
14 |   // Example matches: https://regexr.com/6p22m
15 |   // Captures without '" surrounding it otherwise it's difficult to deal with capturing srcset (https://developer.mozilla.org/en-US/docs/Learn/HTML/Multimedia_and_embedding/Responsive_images)
16 |   const matches =
17 |     content.match(
18 |       /\bhttps?\:\/\/assets\.website\-files\.com\/[a-z0-9.\-_~!$&()*+;=:@% /]+\.([a-z]{2,4})\b/gi
19 |     ) ?? [];
20 | 
21 |   // Only collect unique ones
22 |   const uniqueMatches = {};
23 |   matches.forEach((match) => (uniqueMatches[match] = true));
24 | 
25 |   const assetUrls = Object.keys(uniqueMatches).map((src) =>
26 |     resolveUrl(url, src)
27 |   );
28 | 
29 |   return assetUrls;
30 | }
31 | 
32 | /**
33 |  * TODO: make recursive
34 |  * @param {object} object
35 |  * @param {string} object.url             URL including https
36 |  * @param {string} object.directoryPath   Use path.resolve on it first
37 |  */
38 | export async function downloadAsset({ url, directoryPath }) {
39 |   const normalisedPath = url.replace(/https?\:\/\//, '');
40 |   const assetPath = path.join(directoryPath, cleanupAssetUrl(normalisedPath)); // Must decode otherwise it gets saved with things like %20 and cannot get served
41 | 
42 |   try {
43 |     fs.mkdirSync(path.dirname(assetPath), { recursive: true });
44 |   } catch (e) {
45 |     console.error('Could not create asset directory', e);
46 |   }
47 | 
48 |   const writer = createWriteStream(assetPath);
49 | 
50 |   let response = null;
51 |   let fetchAttempts = 0;
52 |   let didFetch = false;
53 | 
54 |   while (!didFetch) {
55 |     try {
56 |       response = await axios({
57 |         method: 'get',
58 |         url,
59 |         responseType: 'stream',
60 |       });
61 |       didFetch = true;
62 |     } catch (e) {
63 |       if (fetchAttempts >= 2) {
64 |         console.error(`Axios failed for ${url}.`);
65 |         didFetch = true;
66 |       } else {
67 |         console.error(`Retrying to download: ${url}.`);
68 |         await delay(10000)
69 |       }
70 |     }
71 |     fetchAttempts++;
72 |   }
73 | 
74 |   return new Promise((resolve, reject) => {
75 |     response.data.pipe(writer);
76 |     writer.on('error', (err) => {
77 |       writer.close();
78 |       reject(err);
79 |     });
80 |     writer.on('finish', () => resolve(assetPath));
81 |   });
82 | }
83 | 
84 | function cleanupAssetUrl(url) {
85 |   return decodeURIComponent(url);
86 | }
87 | 
88 | function delay(delayInMs) {
89 |   return new Promise((resolve) => setTimeout(resolve, delayInMs));
90 | }
91 | 


--------------------------------------------------------------------------------
/scraper/htmlScraper.mjs:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import cheerio from 'cheerio';
  3 | import fs, { readFileSync } from 'fs';
  4 | import path from 'path';
  5 | import { downloadAsset, getAssetUrls } from './assetScraper.mjs';
  6 | import { baseDirectory, urls } from './config.mjs';
  7 | 
  8 | const downloadedAssetUrls = {};
  9 | 
 10 | export async function scrapePages() {
 11 |   for (let pageUrl of urls) {
 12 |     // Sitemap in a webflow project might be prefixed with https://flow.offerzen.com/ when actual site queried is https://offerzen.webflow.io/
 13 |     // Get path after first slash after `https://`
 14 |     const httpsSlash = pageUrl.indexOf('//') + 2;
 15 |     const directoryName = pageUrl.slice(httpsSlash);
 16 | 
 17 |     // Add folders to static under project base
 18 |     const directoryPath = path.join(baseDirectory, directoryName);
 19 | 
 20 |     try {
 21 |       fs.mkdirSync(directoryPath, { recursive: true });
 22 |     } catch (e) {
 23 |       console.error('Could not create directories', e);
 24 |     }
 25 |     const { assetUrls, content: html } = await getPageContent({
 26 |       url: pageUrl,
 27 |       directoryPath,
 28 |     });
 29 |     // Paths don't matter, just download for later
 30 |     await downloadAssets({ assetUrls });
 31 | 
 32 |     try {
 33 |       fs.writeFileSync(path.join(directoryPath, 'index.html'), html, {
 34 |         encoding: 'utf-8',
 35 |       });
 36 |     } catch (e) {
 37 |       console.error('Failed to write file', pageUrl, 'Error', e);
 38 |     }
 39 |   }
 40 | }
 41 | 
 42 | async function getPageContent({ url }) {
 43 |   let data = null;
 44 |   try {
 45 |     data = (await axios.get(url)).data;
 46 |   } catch (e) {
 47 |     console.error(`Axios failed for: ${url}`);
 48 |   }
 49 | 
 50 |   const $ = cheerio.load(data);
 51 | 
 52 |   const content = $.html();
 53 | 
 54 |   const assetUrls = getAssetUrls({ content, url });
 55 | 
 56 |   return { content, assetUrls };
 57 | }
 58 | 
 59 | async function downloadAssets({ assetUrls }) {
 60 |   // Download only new assets
 61 |   const assetsToDownload = assetUrls.filter(
 62 |     (assetUrl) => !(assetUrl in downloadedAssetUrls)
 63 |   );
 64 | 
 65 |   if (!assetsToDownload.length) return [];
 66 | 
 67 |   const assetPaths = await Promise.allSettled(
 68 |     assetsToDownload.map(async (url, i) => {
 69 |       // download to baseDirectory instead of subdomain path so all sites can share assets
 70 |       const assetPath = await downloadAsset({
 71 |         url,
 72 |         directoryPath: baseDirectory,
 73 |       });
 74 |       downloadedAssetUrls[url] = assetPath;
 75 |       console.log(`Downloaded ${Object.keys(downloadedAssetUrls).length}`);
 76 | 
 77 |       const nestedAssetPaths = await downloadNestedAssets({ assetPath, url });
 78 | 
 79 |       return [assetPath, ...nestedAssetPaths];
 80 |     })
 81 |   );
 82 | 
 83 |   const failed = assetPaths
 84 |     .filter((r) => r.status === 'rejected')
 85 |     .map((r) => r.reason);
 86 |   if (failed.length) {
 87 |     console.error('Failed to download assets', failed);
 88 |   }
 89 | 
 90 |   return assetPaths.flat(Infinity);
 91 | }
 92 | 
 93 | /**
 94 |  * Recurse into css and svg files for other asset links
 95 |  * @param {string} assetPath Asset file path
 96 |  * @param {string} url       Asset url
 97 |  */
 98 | async function downloadNestedAssets({ assetPath, url }) {
 99 |   if (assetPath.match(/\.(css|svg|js)$/gi) == null) {
100 |     // Stick to the likely candidates for including nested content
101 |     return [];
102 |   }
103 | 
104 |   const content = fs.readFileSync(assetPath, 'utf-8');
105 |   const assetUrls = getAssetUrls({ content, url });
106 | 
107 |   return downloadAssets({ assetUrls });
108 | }
109 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_staging.yml:
--------------------------------------------------------------------------------
  1 | name: Publish Staging
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   repository_dispatch:
  6 |     types: [publish_webflow]
  7 | 
  8 | jobs:
  9 |   publish-target:
 10 |     runs-on: ubuntu-latest
 11 |     timeout-minutes: 1
 12 |     env:
 13 |       SITE: ${{ github.event.client_payload.site }}
 14 |     outputs:
 15 |       site: ${{ steps.regex-match.outputs.match }}
 16 |     steps:
 17 |       - uses: actions-ecosystem/action-regex-match@v2
 18 |         id: regex-match
 19 |         with:
 20 |           text: ${{ github.event.client_payload.site }}
 21 |           regex: '[a-z_\-]+\.webflow\.io'
 22 | 
 23 |   scrape:
 24 |     needs: [publish-target]
 25 |     timeout-minutes: 10
 26 |     if: ${{ needs.publish-target.outputs.site != '' }}
 27 |     runs-on: ubuntu-latest
 28 |     env:
 29 |       NODE_ENV: staging
 30 |       SITE: ${{ needs.publish-target.outputs.site }}
 31 |       CDN_URL: ${{ secrets.CDN_URL }}
 32 |       WEBFLOW_CDN_URL: https://assets.website-files.com/
 33 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 34 |       BRANCH_NAME: staging
 35 |       PREVIEW_DOMAIN: ${{ secrets.PREVIEW_DOMAIN }}
 36 | 
 37 |     strategy:
 38 |       matrix:
 39 |         node-version: [16.x]
 40 | 
 41 |     steps:
 42 |       - uses: actions/checkout@v4
 43 |         with:
 44 |           ref: master
 45 |           token: ${{ secrets.GITHUB_TOKEN }}
 46 | 
 47 |       - name: Reset branch
 48 |         run: |
 49 |           if [ "`git branch -r | egrep staging`" ]
 50 |           then
 51 |             git push origin --delete ${{ env.BRANCH_NAME }}
 52 |           fi
 53 |           git checkout -b ${{ env.BRANCH_NAME }} master
 54 |           git pull origin master
 55 | 
 56 |       - name: Use Node.js ${{ matrix.node-version }}
 57 |         uses: actions/setup-node@v4
 58 |         with:
 59 |           node-version: ${{ matrix.node-version }}
 60 |       # yarn caching
 61 |       - uses: actions/cache@v4
 62 |         with:
 63 |           path: '**/node_modules'
 64 |           key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
 65 | 
 66 |       - name: Install Dependencies
 67 |         run: |
 68 |           yarn
 69 | 
 70 |       - name: Run scraper
 71 |         run: |
 72 |           yarn scrape
 73 | 
 74 |       - name: Send slack notification
 75 |         if: failure()
 76 |         run: |
 77 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Scraper failed for ${{ env.SITE }}"}' ${{ secrets.SLACK_HOOK }}
 78 | 
 79 |       # Without any changes it will not trigger deploys
 80 |       - name: fake
 81 |         run: |
 82 |           echo $(date '+%s') > test.txt
 83 | 
 84 |       - name: Commit and push
 85 |         uses: actions-js/push@v1.4
 86 |         with:
 87 |           github_token: ${{ secrets.GITHUB_TOKEN }}
 88 |           message: 'Automatic: scrape'
 89 |           branch: ${{ env.BRANCH_NAME }}
 90 |           force: true
 91 | 
 92 |       - name: Actually push
 93 |         run: |
 94 |           git push origin ${{ env.BRANCH_NAME }}
 95 | 
 96 |       - name: Run test
 97 |         id: test
 98 |         run: |
 99 |           SITE=${{ env.SITE }} yarn test
100 | 
101 |       - name: Send slack notification
102 |         if: failure()
103 |         run: |
104 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Tests failed for ${{ env.SITE }}: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}?check_suite_focus=true"}' ${{ secrets.SLACK_HOOK }}
105 | 
106 |       - name: Send slack notification
107 |         if: success()
108 |         run: |
109 |           echo "SITE_PATH=$(echo "${{ env.SITE }}" | sed -e 's/https:\/\///g' -e 's:/*$::')" >> $GITHUB_ENV
110 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Tests successful for ${{ env.SITE }}"}' ${{ secrets.SLACK_HOOK }}
111 | 
112 |       - name: Send preview url notification
113 |         if: success()
114 |         run: |
115 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Preview Deployed: ${{ env.PREVIEW_DOMAIN }}${{ env.SITE_PATH }}"}' ${{ secrets.SLACK_HOOK }}
116 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-without-scraping.yml:
--------------------------------------------------------------------------------
  1 | # This can be used to redeploy after rollback as it re-uses already scraped assets. It targets a specific site / project only
  2 | name: Deploy <site> without scraping
  3 | 
  4 | on:
  5 |   workflow_dispatch:
  6 | 
  7 | jobs:
  8 |   build:
  9 |     env:
 10 |       SITE: ${{ github.event.client_payload.site }} # Replace with actual webflow site url if this isn't triggered by webhook
 11 |       NODE_ENV: production
 12 |       CDN_URL: ${{ secrets.CDN_URL }}
 13 |       WEBFLOW_CDN_URL: https://assets.website-files.com/
 14 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 15 |       PREVIEW_DOMAIN: ${{ secrets.CDN_URL }}
 16 | 
 17 |     runs-on: ubuntu-latest
 18 |     timeout-minutes: 7
 19 |     steps:
 20 | 
 21 |       - uses: actions/checkout@v4
 22 |         with:
 23 |           ref: master
 24 |           token: ${{ secrets.GITHUB_TOKEN }}
 25 | 
 26 |       - name: Use Node.js ${{ matrix.node-version }}
 27 |         uses: actions/setup-node@v4
 28 |         with:
 29 |           node-version: ${{ matrix.node-version }}
 30 | 
 31 |       # yarn caching
 32 |       - uses: actions/cache@v4
 33 |         with:
 34 |           path: '**/node_modules'
 35 |           key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
 36 | 
 37 |       - name: Install Dependencies
 38 |         run: |
 39 |           yarn build
 40 |           echo "SITE_PATH=$(echo "${{ env.SITE }}" | sed -e 's/https:\/\///g' -e 's:/*$::')" >> $GITHUB_ENV
 41 | 
 42 |       - name: Upload transformed sites
 43 |         uses: actions/upload-artifact@v4
 44 |         with:
 45 |           name: static-files
 46 |           path: static/
 47 |           retention-days: 1
 48 | 
 49 |       - name: Send slack notification
 50 |         if: failure()
 51 |         run: |
 52 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Production Deploy failed: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}?check_suite_focus=true"}' ${{ secrets.SLACK_HOOK }}
 53 | 
 54 | 
 55 |       - name: Send preview url notification
 56 |         if: success()
 57 |         run: |
 58 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Production Deployed: ${{ env.PREVIEW_DOMAIN }}${{ env.SITE_PATH }}"}' ${{ secrets.SLACK_HOOK }}
 59 | 
 60 |   deploy-production:
 61 |     needs: [build]
 62 |     runs-on: ubuntu-latest
 63 |     timeout-minutes: 6
 64 | 
 65 |     steps:
 66 |       - name: Checkout
 67 |         uses: actions/checkout@v4
 68 | 
 69 |       - name: Set shared environment variables
 70 |         uses: offerzen/action-env-vars-from-ssm@v1
 71 |         with:
 72 |           path: '/shared/'
 73 |         env:
 74 |           AWS_ACCESS_KEY_ID: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
 75 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
 76 |           AWS_DEFAULT_REGION: ${{ secrets.ORG_AWS_REGION }}
 77 |           AWS_ROLE_ARN: ${{ vars.ORG_AWS_ROLE_ARN_GHA_PRODUCTION }}
 78 | 
 79 |       - name: Configure AWS Credentials
 80 |         uses: aws-actions/configure-aws-credentials@v4
 81 |         with:
 82 |           aws-access-key-id: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
 83 |           aws-secret-access-key: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
 84 |           aws-region: ${{ secrets.ORG_AWS_REGION }}
 85 |           role-to-assume: ${{ vars.ORG_AWS_ROLE_ARN_GHA_PRODUCTION }}
 86 |           role-duration-seconds: 900
 87 | 
 88 |       - name: Download transformed sites
 89 |         uses: actions/download-artifact@v4
 90 |         with:
 91 |           name: static-files
 92 |           path: static/
 93 | 
 94 |       - name: Upload build to S3 bucket
 95 |         env:
 96 |           S3_DEPLOY_PATH: "s3://$S3_BUCKET_NAME_CDN/${{ github.event.repository.id }}"
 97 |         run: |
 98 |           echo "Uploading non-HTML files to ${{ env.S3_DEPLOY_PATH }}"
 99 |           aws s3 sync --no-progress --exclude *.html static/ ${{ env.S3_DEPLOY_PATH }}
100 |           echo
101 |           echo "Uploading HTML files to S3 with 'cache-control:no-cache' header..."
102 |           aws s3 sync --no-progress --include *.html --content-type "text/html;charset=utf-8" --cache-control no-cache static/ ${{ env.S3_DEPLOY_PATH }}
103 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
  1 | # Wombat
  2 | 
  3 | Webflow Combat
  4 | 
  5 | ## Purpose
  6 | 
  7 | Scrapes pages from Webflow whenever a project is published.
  8 | 
  9 | - Run tests on the source of scraped pages
 10 | - Transform source code with Core Web Vitals optimisations
 11 | - Serve all pages on a different platform so any Webflow publish will only go live if it passes all tests and transformations
 12 | 
 13 | ## Why
 14 | 
 15 | Webflow has signficant issues that reduce the safety of any given publish action.
 16 | 
 17 | **Webflow problems**
 18 | Ticks are where this project solves the issue:
 19 | 
 20 | - [x] Changes are very obfuscated and not clear what is being done (or undone if reverting)
 21 | - [x] Changes made while developing can accidentally be left in the code and published
 22 | - [x] Bad page construction for core web vitals that cannot be changed as they are editable in the Webflow designer
 23 | - [x] Single CSS file for entire project (all pages share one large file)
 24 | - [x] Unable to see what changes will go live without manually reviewing an entire project _before_ clicking publish
 25 | - [x] Clear list of changes each publish
 26 | - [x] Reverting changes
 27 | - [ ] When publishing all project pages will go live and it cannot be "locked" to a given page or set of changes
 28 | - [ ] CSS edited on one side of the site can affect the rest of the site
 29 | 
 30 | ## Project architecture
 31 | 
 32 | ### Process flow
 33 | 
 34 | ![Process flow](./docs/project-process-flow.png)
 35 | 
 36 | #### Setup a Webflow project
 37 | 
 38 | Add a webhook to a Webflow project on `publish`. Zapier is one option, there are many others.
 39 | 
 40 | #### When a Webflow project is published
 41 | 
 42 | - WebFlow is published, fires the Webhook
 43 |   - `domains` are extracted and first domain is included in `client_payload.site` for GitHub workflow. Any domain can be used, all point to the same place
 44 |   - `POST` to [Wombat GitHub actions](https://api.github.com/repos/offerzen/wombat/dispatches) as `publish_webflow`
 45 | - Fires the `.github/workflows/publish.yml` workflow
 46 |   - Webflow project specified in `client_payload.site` is scraped
 47 |   - All files are placed in `/compare`
 48 |     - Files are placed in a root folder based on name from `client_payload.site`
 49 |     - Folders are created based on directory path and pages are added as `index.html` files
 50 |   - PR is created so raw (without transformations) Webflow files can be compared to the last publish
 51 |   - Source code tests (`yarn test`) are run against `/compare` files. This uses Jest (all added to `/tests`)
 52 |   - Any successes or failures for this workflow will be reported to Slack _#gp-webflow-platform_
 53 | 
 54 | #### Build and deploy
 55 | 
 56 | To ensure only tested files are deployed, all files are taken from `/compare` and not rescraped.
 57 | 
 58 | - Delete everything in `/static` to ensure we don't serve old content
 59 | - Copy everything `/compare` to `/static`.
 60 | - `/compare` content is always added to git but `/static` is not, so that there are no extra
 61 | - Run optimsation transformations (`/core-web-vitals/index.mjs`) on html source and recopy files to `/static`.
 62 | - Possibility: trigger end-to-end tests in staging (outside of project scope)
 63 | - Serve
 64 | 
 65 | ### File architecture
 66 | 
 67 | Core components are marked by `[component]`.
 68 | 
 69 | ```bash
 70 | ├── babel.config.js                #
 71 | │
 72 | ├── [build]                        `Component: Builder`
 73 | │   │                               Building the output for deployment. This will be run as a final step before deployment.
 74 | │   │                               Files are modified and copied from `/compare` to `/static`.
 75 | │   ├── assets                     # Operate on any assets
 76 | │   │   └── rewriteAssetPaths.mjs  # All paths to Webflow's CDN are rewritten to use our CDN
 77 | │   ├── coreWebVitals              # Optimisation transformations plugins
 78 | │   │   ├── css                    # Optimisation plugins for css
 79 | │   │   │   └── uncss.mjs          # Plugin: Makes CSS per-page instead of per-project and strips out unnecessary rules
 80 | │   │   ├── html                   # Optimisation plugins  for html
 81 | │   │   │   └── typekit.mjs        # Makes Typekit async
 82 | │   │   └── index.mjs              #
 83 | │   └── index.mjs                  # Run by `yarn build`. Copies from /compare to /static and runs all plugins
 84 | │
 85 | ├── compare                        # Scraped files from Webflow without transformations
 86 | │   └── ...
 87 | │
 88 | ├── docs                           # Images for this readme
 89 | │   └── ...
 90 | │
 91 | ├── helpers                        # Shared tools
 92 | │   └── utils.mjs                  # Read files from directory, pipe
 93 | │
 94 | ├── jest.config.js                 # For jest source code tests
 95 | ├── scrape-all.sh                  # Scrapes all files for each Webflow project to populate /compare if needed
 96 | │
 97 | ├── [scraper]                      `Component: Scraper`
 98 | │   │                               Get all the pages and assets from Webflow. Downloads files to `/compare`.
 99 | │   ├── assetScraper.mjs           # Handle scraping and downloading assets e.g. images, css, svg files
100 | │   ├── config.mjs                 # Global vars for current site
101 | │   ├── htmlScraper.mjs            # Handle scraping HTML
102 | │   ├── scrape.mjs                 # Scrape Webflow based on sitemap
103 | │   └── sitemap.mjs                # Get sitemap from Webflow as targets
104 | │
105 | ├── static                         # Scraped files from Webflow with optimisations for serving
106 | │   └── ...
107 | │
108 | └── [tests]                        `Component: Tester`
109 |     │                               Tests to run on scraped content
110 |     │                               Use Jest, and see the example `test.template.js` to learn more.
111 |     ├── ngrok.test.js              # Checks for presence of ngrok
112 |     ├── setup.mjs                  # Allows tests to query source with `global.html(...)`
113 |     └── test.template.js           # Example test
114 | ```
115 | 
116 | _To regenerate above, install `tree` e.g. `brew install tree` and run `yarn doc-tree` and cleanup output._
117 | 
118 | ## Local development
119 | 
120 | Add a .env file with the following:
121 | 
122 | ```bash
123 | SITE=    # e.g https://offerzen.webflow.io/
124 | CDN_URL= # e.g. https://offerzen.github.io/assets/
125 | ```
126 | 
127 | Setup the project:
128 | 
129 | ```bash
130 | $ yarn
131 | ```
132 | 
133 | Scrape just one project:
134 | 
135 | ```bash
136 | $ yarn scrape
137 | ```
138 | 
139 | Run tests on scraped files:
140 | 
141 | ```bash
142 | $ yarn test
143 | ```
144 | 
145 | Run build-step optimisations/transformations:
146 | 
147 | ```bash
148 | $ yarn build
149 | ```
150 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   repository_dispatch:
  6 |     types: [publish_webflow]
  7 | 
  8 | jobs:
  9 |   publish-target:
 10 |     runs-on: ubuntu-latest
 11 |     timeout-minutes: 1
 12 |     env:
 13 |       SITE: ${{ github.event.client_payload.site }}
 14 |     outputs:
 15 |       site: ${{ steps.regex-match.outputs.match }}
 16 |     steps:
 17 |       - uses: actions-ecosystem/action-regex-match@v2
 18 |         id: regex-match
 19 |         with:
 20 |           text: ${{ github.event.client_payload.site }}
 21 |           regex: '[a-z_\-]+\.offerzen\.com'
 22 | 
 23 |   scrape:
 24 |     needs: [publish-target]
 25 |     if: ${{ needs.publish-target.outputs.site != '' }}
 26 |     runs-on: ubuntu-latest
 27 |     timeout-minutes: 10
 28 |     env:
 29 |       NODE_ENV: test
 30 |       SITE: ${{ needs.publish-target.outputs.site }}
 31 |       CDN_URL: ${{ secrets.CDN_URL }}
 32 |       WEBFLOW_CDN_URL: https://assets.website-files.com/
 33 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 34 |       BRANCH_NAME: test
 35 | 
 36 |     strategy:
 37 |       matrix:
 38 |         node-version: [16.x]
 39 | 
 40 |     steps:
 41 |       - uses: actions/checkout@v4
 42 |         with:
 43 |           ref: master
 44 |           token: ${{ secrets.GITHUB_TOKEN }}
 45 | 
 46 |       - name: Use Node.js ${{ matrix.node-version }}
 47 |         uses: actions/setup-node@v4
 48 |         with:
 49 |           node-version: ${{ matrix.node-version }}
 50 |       # yarn caching
 51 |       - uses: actions/cache@v4
 52 |         with:
 53 |           path: '**/node_modules'
 54 |           key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
 55 | 
 56 |       - name: Install Dependencies
 57 |         run: |
 58 |           yarn
 59 | 
 60 |       - name: Run scraper
 61 |         run: |
 62 |           yarn scrape
 63 | 
 64 |       - name: Send slack notification
 65 |         if: failure()
 66 |         run: |
 67 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Scraper failed for ${{ env.SITE }}"}' ${{ secrets.SLACK_HOOK }}
 68 | 
 69 |       - name: Setup branch
 70 |         id: branch
 71 |         run: |
 72 |           echo "SITE_PATH=$(echo "${{ env.SITE }}" | sed -e 's/https:\/\///g' -e 's:/*$::')" >> $GITHUB_ENV
 73 |           echo "BRANCH_NAME=automatic/$(date '+%s')" >> $GITHUB_ENV
 74 | 
 75 |       - name: Create branch
 76 |         run: |
 77 |           git checkout -b ${{ env.BRANCH_NAME }}
 78 | 
 79 |       - name: Commit and push
 80 |         uses: actions-js/push@v1.4
 81 |         with:
 82 |           github_token: ${{ secrets.GITHUB_TOKEN }}
 83 |           message: 'Automatic: scrape'
 84 |           branch: ${{ env.BRANCH_NAME }}
 85 | 
 86 |       - name: Actually push
 87 |         run: |
 88 |           git push -u origin ${{ env.BRANCH_NAME }}
 89 | 
 90 |       - name: Create Pull Request
 91 |         id: cpr
 92 |         run: |
 93 |           echo "PULL_REQUEST_URL=$(gh pr create --base master --title 'Webflow publish for ${{ env.SITE_PATH }}' --body 'Automatic: Webflow publish')" >> $GITHUB_ENV
 94 | 
 95 |       - name: Run test
 96 |         id: test
 97 |         run: |
 98 |           SITE=${{ env.SITE }} yarn test
 99 | 
100 |       - name: Send slack notification
101 |         if: failure()
102 |         run: |
103 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Tests failed for ${{ env.SITE }}: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}?check_suite_focus=true"}' ${{ secrets.SLACK_HOOK }}
104 | 
105 |       - name: Send slack notification
106 |         if: success()
107 |         run: |
108 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Tests successful for ${{ env.SITE_PATH }}"}' ${{ secrets.SLACK_HOOK }}
109 | 
110 |       - name: Auto merge PR
111 |         id: automerge
112 |         run: |
113 |           gh pr merge ${{ env.PULL_REQUEST_URL }} --delete-branch --auto --merge
114 | 
115 |       - name: feedback
116 |         run: |
117 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Pull request: ${{ env.PULL_REQUEST_URL }}"}' ${{ secrets.SLACK_HOOK }}
118 | 
119 |   build:
120 |     needs: [publish-target,scrape]
121 |     env:
122 |       SITE: ${{ needs.publish-target.outputs.site }}
123 |       NODE_ENV: production
124 |       CDN_URL: ${{ secrets.CDN_URL }}
125 |       WEBFLOW_CDN_URL: https://assets.website-files.com/
126 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
127 |       PREVIEW_DOMAIN: ${{ secrets.PREVIEW_DOMAIN }}
128 | 
129 |     runs-on: ubuntu-latest
130 |     timeout-minutes: 20
131 |     steps:
132 | 
133 |       - uses: actions/checkout@v4
134 |         with:
135 |           ref: master
136 |           token: ${{ secrets.GITHUB_TOKEN }}
137 | 
138 |       - name: Use Node.js ${{ matrix.node-version }}
139 |         uses: actions/setup-node@v4
140 |         with:
141 |           node-version: ${{ matrix.node-version }}
142 | 
143 |       # yarn caching
144 |       - uses: actions/cache@v4
145 |         with:
146 |           path: '**/node_modules'
147 |           key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
148 | 
149 |       - name: Install Dependencies
150 |         run: |
151 |           yarn build
152 |           echo "SITE_PATH=$(echo "${{ env.SITE }}" | sed -e 's/https:\/\///g' -e 's:/*$::')" >> $GITHUB_ENV
153 | 
154 |       - name: Upload transformed sites
155 |         uses: actions/upload-artifact@v4
156 |         with:
157 |           name: static-files
158 |           path: static/
159 |           retention-days: 1
160 | 
161 |       - name: Send slack notification
162 |         if: failure()
163 |         run: |
164 |           curl -X POST -H "Content-type: application/json" --data '{"text": "<!channel> Production Deploy failed: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}?check_suite_focus=true"}' ${{ secrets.SLACK_HOOK }}
165 | 
166 | 
167 |       - name: Send preview url notification
168 |         if: success()
169 |         run: |
170 |           curl -X POST -H "Content-type: application/json" --data '{"text": "Production Deployed: ${{ env.PREVIEW_DOMAIN }}${{ env.SITE_PATH }}"}' ${{ secrets.SLACK_HOOK }}
171 | 
172 |   deploy-staging:
173 |     needs: [build]
174 |     runs-on: ubuntu-latest
175 |     timeout-minutes: 15
176 | 
177 |     steps:
178 |       - name: Checkout
179 |         uses: actions/checkout@v4
180 | 
181 |       - name: Set shared environment variables
182 |         uses: offerzen/action-env-vars-from-ssm@v1
183 |         with:
184 |           path: '/shared/'
185 |         env:
186 |           AWS_ACCESS_KEY_ID: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
187 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
188 |           AWS_DEFAULT_REGION: ${{ secrets.ORG_AWS_REGION }}
189 |           AWS_ROLE_ARN: ${{ vars.ORG_AWS_ROLE_ARN_GHA_STAGING }}
190 | 
191 |       - name: Configure AWS Credentials
192 |         uses: aws-actions/configure-aws-credentials@v4
193 |         with:
194 |           aws-access-key-id: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
195 |           aws-secret-access-key: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
196 |           aws-region: ${{ secrets.ORG_AWS_REGION }}
197 |           role-to-assume: ${{ vars.ORG_AWS_ROLE_ARN_GHA_STAGING }}
198 |           role-duration-seconds: 900
199 | 
200 |       - name: Download transformed sites
201 |         uses: actions/download-artifact@v4
202 |         with:
203 |           name: static-files
204 |           path: static/
205 | 
206 |       - name: Upload build to S3 bucket
207 |         env:
208 |           S3_DEPLOY_PATH: "s3://$S3_BUCKET_NAME_CDN/${{ github.event.repository.id }}"
209 |         run: |
210 |           echo "Uploading non-HTML files to ${{ env.S3_DEPLOY_PATH }}"
211 |           aws s3 sync --no-progress --exclude *.html static/ ${{ env.S3_DEPLOY_PATH }}
212 |           echo
213 |           echo "Uploading HTML files to S3 with 'cache-control:no-cache' header..."
214 |           aws s3 sync --no-progress --include *.html --content-type "text/html;charset=utf-8" --cache-control no-cache static/ ${{ env.S3_DEPLOY_PATH }}
215 | 
216 |   deploy-production:
217 |     needs: [build]
218 |     runs-on: ubuntu-latest
219 |     timeout-minutes: 15
220 | 
221 |     steps:
222 |       - name: Checkout
223 |         uses: actions/checkout@v4
224 | 
225 |       - name: Set shared environment variables
226 |         uses: offerzen/action-env-vars-from-ssm@v1
227 |         with:
228 |           path: '/shared/'
229 |         env:
230 |           AWS_ACCESS_KEY_ID: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
231 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
232 |           AWS_DEFAULT_REGION: ${{ secrets.ORG_AWS_REGION }}
233 |           AWS_ROLE_ARN: ${{ vars.ORG_AWS_ROLE_ARN_GHA_PRODUCTION }}
234 | 
235 |       - name: Configure AWS Credentials
236 |         uses: aws-actions/configure-aws-credentials@v4
237 |         with:
238 |           aws-access-key-id: ${{ vars.ORG_AWS_ACCESS_KEY_ID_GHA }}
239 |           aws-secret-access-key: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY_GHA }}
240 |           aws-region: ${{ secrets.ORG_AWS_REGION }}
241 |           role-to-assume: ${{ vars.ORG_AWS_ROLE_ARN_GHA_PRODUCTION }}
242 |           role-duration-seconds: 900
243 | 
244 |       - name: Download transformed sites
245 |         uses: actions/download-artifact@v4
246 |         with:
247 |           name: static-files
248 |           path: static/
249 | 
250 |       - name: Upload build to S3 bucket
251 |         env:
252 |           S3_DEPLOY_PATH: "s3://$S3_BUCKET_NAME_CDN/${{ github.event.repository.id }}"
253 |         run: |
254 |           echo "Uploading non-HTML files to ${{ env.S3_DEPLOY_PATH }}"
255 |           aws s3 sync --no-progress --exclude *.html static/ ${{ env.S3_DEPLOY_PATH }}
256 |           echo
257 |           echo "Uploading HTML files to S3 with 'cache-control:no-cache' header..."
258 |           aws s3 sync --no-progress --include *.html --content-type "text/html;charset=utf-8" --cache-control no-cache static/ ${{ env.S3_DEPLOY_PATH }}
259 | 


--------------------------------------------------------------------------------