├── .cloudgov
    ├── manifest.yml
    └── vars
    │   ├── pages-dev.yml
    │   ├── pages-production.yml
    │   └── pages-staging.yml
├── .codeclimate.yml
├── .coveragerc
├── .cz.json
├── .dockerignore
├── .flake8
├── .github
    ├── dependabot.yml
    ├── pull_request_template.md
    └── workflows
    │   └── codeql-analysis.yml
├── .gitignore
├── .local.sample.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile-db
├── Dockerfile-exp
├── Dockerfile-test
├── LICENSE.md
├── README.md
├── bin
    ├── migrate.sql
    └── push-docker-image.sh
├── ci
    ├── partials
    │   ├── audit.yml
    │   ├── build.yml
    │   ├── deploy.yml
    │   └── test.yml
    ├── pipeline-dev.yml
    ├── pipeline-production.yml
    ├── pipeline-staging.yml
    ├── pipeline.yml
    └── tasks
    │   ├── deploy.sh
    │   ├── pip-audit.sh
    │   └── test.sh
├── docker-compose.yml
├── docker
    └── ua-attach-config.sh
├── echo-server
    ├── Dockerfile
    └── run.py
├── pytest.ini
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── src
    ├── build.py
    ├── common.py
    ├── crypto
    │   ├── __init__.py
    │   └── decrypt.py
    ├── log_utils
    │   ├── __init__.py
    │   ├── common.py
    │   ├── db_handler.py
    │   ├── delta_to_mins_secs.py
    │   ├── get_logger.py
    │   ├── monitoring.py
    │   └── remote_logs.py
    ├── main.py
    ├── publishing
    │   ├── __init__.py
    │   ├── models.py
    │   └── s3publisher.py
    ├── repo_config
    │   ├── __init__.py
    │   └── repo_config.py
    ├── runner
    │   └── __init__.py
    └── steps
    │   ├── __init__.py
    │   ├── build.py
    │   ├── cache.py
    │   ├── exceptions.py
    │   ├── fetch.py
    │   └── publish.py
└── test
    ├── __init__.py
    ├── publishing
        ├── __init__.py
        ├── test_models.py
        └── test_s3publisher.py
    ├── repo_config
        ├── __init__.py
        └── test_repo_config.py
    ├── support.py
    ├── test_build.py
    ├── test_cache.py
    ├── test_crypto.py
    ├── test_fetch.py
    ├── test_log_utils.py
    ├── test_publish.py
    ├── test_remote_logs.py
    ├── test_repo_config.py
    └── test_runner.py


/.cloudgov/manifest.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | applications:
 3 |   - name: ((product))-build-container((env_postfix))
 4 |     no-route: true
 5 |     health-check-type: process
 6 |     instances: 0
 7 |     services:
 8 |       - federalist-((env))-rds
 9 |       - federalist-((env))-uev-key
10 |       - pages-((env))-encryption
11 |     metadata:
12 |       labels:
13 |         type: build-container
14 |         name: default
15 |       annotations:
16 |         command: cd app && python main.py -p
17 |   - name: ((product))-build-container-exp((env_postfix))
18 |     no-route: true
19 |     health-check-type: process
20 |     instances: 0
21 |     services:
22 |       - federalist-((env))-rds
23 |       - federalist-((env))-uev-key
24 |     metadata:
25 |       labels:
26 |         type: build-container
27 |         name: exp
28 |       annotations:
29 |         command: cd app && ./build -p
30 | 


--------------------------------------------------------------------------------
/.cloudgov/vars/pages-dev.yml:
--------------------------------------------------------------------------------
1 | env: dev
2 | env_postfix: -dev
3 | product: pages
4 | 


--------------------------------------------------------------------------------
/.cloudgov/vars/pages-production.yml:
--------------------------------------------------------------------------------
1 | env: production
2 | env_postfix: '-production'
3 | product: pages
4 | 


--------------------------------------------------------------------------------
/.cloudgov/vars/pages-staging.yml:
--------------------------------------------------------------------------------
1 | env: staging
2 | env_postfix: -staging
3 | product: pages


--------------------------------------------------------------------------------
/.codeclimate.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | 
 3 | plugins:
 4 |   pep8:
 5 |     enabled: true
 6 | 
 7 | exclude_patterns:
 8 |   - "bin"
 9 |   - "main.py"
10 |   - "manifests"
11 |   - "test"


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = */.local/*


--------------------------------------------------------------------------------
/.cz.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "commitizen": {
 3 |     "name": "cz_customize",
 4 |     "version_scheme": "semver",
 5 |     "version_provider": "scm",
 6 |     "update_changelog_on_bump": true,
 7 |     "major_version_zero": false,
 8 |     "bump_message": "chore: release $new_version",
 9 |     "gpg_sign": true,
10 |     "changelog_incremental": true,
11 |     "customize": {
12 |       "message_template": "{{change_type}}:{% if show_message %} {{message}}{% endif %}",
13 |       "example": "feat: this feature enable customize through config file",
14 |       "schema": "<type>: <body>",
15 |       "schema_pattern": "^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test){1}(\\([\\w\\-\\.]+\\))?(!)?: ([\\w \\-'])+([\\s\\S]*)",
16 |       "bump_pattern": "^(.+!|BREAKING CHANGE|chore|docs|feat|fix|perf|refactor|revert|style|test)(\\([\\w\\-\\.]+\\))?:",
17 |       "bump_map": {
18 |           ".+!": "MAJOR",
19 |           "BREAKING CHANGE": "MAJOR",
20 |           "feat": "MINOR",
21 |           "fix": "PATCH",
22 |           "chore": "PATCH",
23 |           "docs": "PATCH",
24 |           "perf": "PATCH",
25 |           "refactor": "PATCH",
26 |           "revert": "MINOR",
27 |           "style": "PATCH",
28 |           "test": "PATCH"
29 |       },
30 |       "change_type_order": ["Breaking Changes", "Added", "Fixed", "Performance", "Reverted", "Maintenance", "Documentation"],
31 |       "commit_parser": "^((?P<change_type>chore|docs|feat|fix|perf|refactor|revert|style|test|BREAKING CHANGE)(?:\\((?P<scope>[^()\r\n]*)\\)|\\()?(?P<breaking>!)?|\\w+!):\\s(?P<message>.*)?",
32 |       "changelog_pattern": "^(.+!|BREAKING CHANGE|chore|docs|feat|fix|perf|refactor|revert|style|test)(\\([\\w\\-\\.]+\\))?:",
33 |       "change_type_map": {
34 |         "BREAKING CHANGE": "Breaking Changes",
35 |         "chore": "Maintenance",
36 |         "docs": "Documentation",
37 |         "feat": "Added",
38 |         "fix": "Fixed",
39 |         "perf": "Performance",
40 |         "refactor": "Maintenance",
41 |         "revert": "Reverted",
42 |         "style": "Maintenance",
43 |         "test": "Maintenance"
44 |       }
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: pip
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |     # Disable version updates for pip dependencies
 8 |     # This still allows for security updates but is pretty sloppy configuration from Github
 9 |     # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit
10 |     open-pull-requests-limit: 0
11 |     commit-message:
12 |       prefix: '[ci skip] '
13 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Changes proposed in this pull request:
2 | -
3 | -
4 | -
5 | 
6 | ## security considerations
7 | [Note the any security considerations here, or make note of why there are none]
8 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ staging, main ]
 6 |   pull_request:
 7 |     branches: [ staging, main ]
 8 |   schedule:
 9 |     - cron: '28 17 * * 4'
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: [ 'python' ]
24 | 
25 |     steps:
26 |     - name: Checkout repository
27 |       uses: actions/checkout@v3
28 | 
29 |     - name: Initialize CodeQL
30 |       uses: github/codeql-action/init@v2
31 |       with:
32 |         languages: ${{ matrix.language }}
33 | 
34 |     - name: Autobuild
35 |       uses: github/codeql-action/autobuild@v2
36 | 
37 |     - name: Perform CodeQL Analysis
38 |       uses: github/codeql-action/analyze@v2
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache
 2 | .coverage
 3 | .env
 4 | .local
 5 | .pytest_cache/
 6 | .python-version
 7 | .vscode
 8 | tmp
 9 | __pycache__/
10 | coverage/
11 | ci/vars/.*


--------------------------------------------------------------------------------
/.local.sample.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "aws_access_key_id": "ACCESS KEY ID",
 3 |   "aws_default_region": "REGION",
 4 |   "aws_secret_access_key": "SECRET ACCESS KEY",
 5 |   "bucket": "BUCKET",
 6 |   "github_token": "TOKEN",
 7 |   "status_callback": "http://echoserver:8989/status",
 8 |   "baseurl": "/",
 9 |   "config": "",
10 |   "build_id": "12345",
11 |   "generator": "GENERATOR",
12 |   "owner": "OWNER",
13 |   "branch": "BRANCH",
14 |   "repository": "REPOSITORY",
15 |   "site_prefix": "preview/OWNER/REPOSITORY",
16 |   "user_environment_variables": []
17 | }


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.1.4 (2025-06-10)
 2 | 
 3 | ### Fixed
 4 | 
 5 | - Sets proper headings on CONTRIBUTING.md
 6 | 
 7 | ### Maintenance
 8 | 
 9 | - Update CONTRIBUTING.md
10 | - Update dependency requests to v2.32.4
11 | - update dependencies
12 | 
13 | ## 0.1.3 (2024-12-05)
14 | 
15 | ### Maintenance
16 | 
17 | - Install and set default node to v20
18 | 
19 | ## 0.1.2 (2024-10-29)
20 | 
21 | ### Maintenance
22 | 
23 | - support node v22, warn on v18 (#4644)
24 | - Bump cryptography from 42.0.7 to 43.0.1 in the pip
25 | 
26 | ## 0.1.1 (2024-06-28)
27 | 
28 | ### Maintenance
29 | 
30 | - use correct audit step name
31 | - switch to ci boot
32 | - fix production release
33 | - add release workflow, wait for certain passes
34 | 
35 | ## 0.1.0 (2024-06-21)
36 | 
37 | ### Added
38 | 
39 | - save build container metrics to API
40 | - Add dev deployment env for PRs to staging
41 | - Switch to using harden container for cf-image
42 | 
43 | ### Fixed
44 | 
45 | - Decrypt predefined keys in build params
46 | - Run build CalledProcessError exception
47 | - remove puts to git-resource in ci pipeline
48 | - install required usg dependencies
49 | - node 16 temporarily allowed
50 | - Add additional lib deps for site builds
51 | - Remove stack param since it is docker image
52 | - Update tests to account for request timeout kwarg addition
53 | - Add timeout to requests based on bandit findings
54 | - CI slack emoji for successful nightly restage
55 | - CI pipeline to properly use src input for nightly rebuild
56 | - Update tests with additional mock calls
57 | - Remove f-string for gem update command
58 | 
59 | ### Performance
60 | 
61 | - run uploader task in threads
62 | 
63 | ### Maintenance
64 | 
65 | - update docs, drop nightly restage
66 | - use correct input pipeline names
67 | - use pipeline tasks, use pr/main/tag release
68 | - Bump requests to v2.32.3
69 | - Add decrypt to build site params
70 | - Enable Dependabot security scanning (#458)
71 | - container hardening
72 | - Add dependency auditing with pip-audit (#458)
73 | - **ci**: Switch to general-task and registry-image for CI jobs
74 | - Add hardened git resource
75 | - Simplify CI notifications from task hooks
76 | - Update resource types and python deps to use hardened images
77 | - Adjust for Github GPG token expiration
78 | - don't build node 16
79 | - default to node 18 (#437)
80 | - Update app stack to cflinuxfs4
81 | - Add gem update --system for Jekyll builds
82 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution Policy
 2 | 
 3 | Cloud.gov is an open source project operated by the U.S. General Services Administration (GSA) to support federal agency missions. While we value transparency and collaboration, we must balance openness with the responsibilities of operating a secure, compliant, and trusted federal platform.
 4 | 
 5 | ## ✅ Who can contribute
 6 | We welcome contributions from:
 7 | 
 8 | - Employees of U.S. federal agencies
 9 | - Contractors working under a current agreement with a U.S. government entity
10 | - GSA-approved contributors as part of official interagency collaboration
11 | 
12 | ## ❌ Who we cannot accept contributions from
13 | To avoid the appearance of government endorsement, manage supply chain risk, and maintain the integrity of our compliance posture, we do **not** accept unsolicited contributions from:
14 | 
15 | - Individuals unaffiliated with the U.S. government
16 | - International contributors or organizations
17 | - Unvetted accounts or first-time contributors submitting minor changes
18 | 
19 | If you're unsure whether your contribution fits, feel free to open an issue first so we can discuss it.
20 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
  1 | # syntax = docker/dockerfile:1.2
  2 | FROM ubuntu:22.04
  3 | 
  4 | # Install general dependencies
  5 | RUN apt-get update \
  6 |   && apt-get install -y --no-install-recommends \
  7 |   apt-utils build-essential git curl libssl-dev \
  8 |   libreadline-dev zlib1g-dev libffi-dev libgl1-mesa-glx \
  9 |   sudo gnupg ca-certificates ubuntu-advantage-tools \
 10 |   autoconf automake libgdbm-dev libncurses5-dev \
 11 |   libsqlite3-dev libtool libyaml-dev pkg-config libgmp-dev \
 12 |   libpq-dev libxi6 libjpeg-dev libpng-dev libtiff-dev libgif-dev \
 13 |   libwebp-dev wget python3 python3-dev python3-pip\
 14 |   # Ruby deps
 15 |   gawk bison sqlite3
 16 | 
 17 | # Deps for container hardening
 18 | RUN ln -sf "/usr/share/zoneinfo/$SYSTEM_TIMEZONE" /etc/localtime
 19 | COPY docker/ua-attach-config.sh .
 20 | RUN --mount=type=secret,id=UA_TOKEN ./ua-attach-config.sh && \
 21 |   ua attach --attach-config ua-attach-config.yaml && \
 22 |   rm ua-attach-config.yaml
 23 | RUN apt-get -y -q install usg
 24 | 
 25 | # Install and setup en_US.UTF-8 locale
 26 | # This is necessary so that output from node/ruby/python
 27 | # won't break or have weird indecipherable characters
 28 | RUN apt-get update && \
 29 |   apt-get install --reinstall -y locales && \
 30 |   sed -i 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
 31 |   locale-gen en_US.UTF-8
 32 | 
 33 | # Install headless chrome
 34 | ARG DEBIAN_FRONTEND=noninteractive
 35 | RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
 36 |   && echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' >> /etc/apt/sources.list.d/google.list \
 37 |   && apt-get update \
 38 |   && apt-get install -y google-chrome-unstable --no-install-recommends \
 39 |   && rm -rf /var/lib/apt/lists/*
 40 | 
 41 | ENV LANG en_US.UTF-8
 42 | ENV LANGUAGE en_US
 43 | ENV LC_ALL en_US.UTF-8
 44 | 
 45 | RUN dpkg-reconfigure --frontend noninteractive locales
 46 | 
 47 | SHELL ["/bin/bash", "-c"]
 48 | 
 49 | # Disable ipv6 to enable fetching gpg keys for rvm
 50 | # http://rvm.io/rvm/security#ipv6-issues
 51 | RUN mkdir -p /root/.gnupg \
 52 |   && echo 'disable-ipv6' >> /root/.gnupg/dirmngr.conf \
 53 |   && echo 'rvm_silence_path_mismatch_check_flag=1' >> /etc/rvmrc \
 54 |   && echo 'install: --no-document\nupdate: --no-document' >> /etc/.gemrc
 55 | 
 56 | RUN useradd --no-log-init --system --create-home --groups sudo system \
 57 |   && echo 'system ALL=(ALL:ALL) NOPASSWD:ALL' >> /etc/sudoers.d/system
 58 | 
 59 | RUN useradd --no-log-init --system --create-home customer
 60 | 
 61 | ###############################################################
 62 | # Run these steps as the 'system' user
 63 | #
 64 | USER system
 65 | 
 66 | # Install rvm
 67 | RUN set -ex \
 68 |   && for key in \
 69 |     409B6B1796C275462A1703113804BB82D39DC0E3 \
 70 |     7D2BAF1CF37B13E2069D6956105BD0E739499BDB \
 71 |   ; do \
 72 |     sudo gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys "$key" || \
 73 |     sudo gpg --batch --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys "$key" || \
 74 |     sudo gpg --batch --keyserver hkp://ipv4.pool.sks-keyservers.net --recv-keys "$key" || \
 75 |     sudo gpg --batch --keyserver hkp://pgp.mit.edu:80 --recv-keys "$key" || \
 76 |     sudo gpg --batch --keyserver hkp://keyserver.pgp.com --recv-keys "$key" ; \
 77 |   done \
 78 |   # We use 'sudo' here to support multi-user install
 79 |   # http://rvm.io/rvm/install#1-download-and-run-the-rvm-installation-script
 80 |   && \curl -sSL https://get.rvm.io | sudo -n bash -s stable
 81 | 
 82 | # Add 'customer' user to rvm group
 83 | RUN sudo usermod --append --groups rvm customer
 84 | 
 85 | ###############################################################
 86 | # Run these steps as the customer user
 87 | #
 88 | USER customer
 89 | 
 90 | # Configure rvm and install default Ruby
 91 | ENV RUBY_VERSION 3.1.4
 92 | ENV RUBY_VERSION_MIN 3.0.6
 93 | RUN source /usr/local/rvm/scripts/rvm \
 94 |   # Fail if deps are missing, won't prompt for sudo
 95 |   && rvm autolibs read-fail \
 96 |   && rvm install --no-docs $RUBY_VERSION \
 97 |   && rvm use --default $RUBY_VERSION \
 98 |   # Make rvm available in non-login bash shells
 99 |   && echo 'source /usr/local/rvm/scripts/rvm' >> ~/.bashrc
100 | 
101 | # Update to the latest RubyGems
102 | RUN source /usr/local/rvm/scripts/rvm && \
103 |     rvm rubygems 3.4.22
104 | 
105 | # Default to Node 20
106 | ENV NODE_VERSION lts/iron
107 | RUN curl https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash \
108 |   && \. "$HOME/.nvm/nvm.sh" \
109 |   && nvm install $NODE_VERSION
110 | 
111 | 
112 | ###############################################################
113 | # Run these steps and the container as the 'root' user
114 | #
115 | # This is necessary because the build code needs to have
116 | # rights to switch to 'customer' user
117 | #
118 | USER root
119 | 
120 | WORKDIR /app
121 | 
122 | COPY ./requirements.txt ./requirements.txt
123 | 
124 | RUN pip3 install -r requirements.txt \
125 |   && rm ./requirements.txt
126 | 
127 | RUN ln -s /usr/bin/python3 /usr/bin/python
128 | 
129 | COPY ./src ./
130 | 
131 | # Container Hardening
132 | RUN usg fix cis_level1_server
133 | RUN apt-get purge --auto-remove -y ubuntu-advantage-tools
134 | 


--------------------------------------------------------------------------------
/Dockerfile-db:
--------------------------------------------------------------------------------
1 | FROM postgres:11
2 | 
3 | COPY ./bin/migrate.sql /docker-entrypoint-initdb.d/migrate.sql


--------------------------------------------------------------------------------
/Dockerfile-exp:
--------------------------------------------------------------------------------
  1 | #################
  2 | #  Build Image  #
  3 | #################
  4 | FROM python:3.8-buster AS builder
  5 | WORKDIR /app
  6 | RUN pip install pyinstaller staticx patchelf-wrapper
  7 | COPY ./src ./requirements.txt ./
  8 | RUN pip install -r requirements.txt
  9 | RUN \
 10 |   pyinstaller -F -n tmp-build --distpath ./dist --hidden-import='pkg_resources.py2_warn' ./main.py \
 11 |   && staticx ./dist/tmp-build ./dist/build
 12 | 
 13 | #################
 14 | #  Final Image  #
 15 | #################
 16 | FROM ruby:2.7-slim
 17 | 
 18 | RUN \
 19 |   apt-get update && apt-get install -y --no-install-recommends \
 20 |     curl \
 21 |     git \
 22 |     gnupg \
 23 |     dirmngr \
 24 |     wget \
 25 |     sudo \
 26 |     gawk bison sqlite3 patch g++ gcc autoconf automake libgdbm-dev \
 27 |     libncurses5-dev libsqlite3-dev libtool make patch pkg-config \
 28 |     libreadline-dev \
 29 |   && rm -rf /var/lib/apt/lists/*
 30 | 
 31 | # Install headless chrome
 32 | RUN \
 33 |   wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
 34 |   && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
 35 |   && apt-get update \
 36 |   && apt-get install -y google-chrome-unstable --no-install-recommends \
 37 |   && rm -rf /var/lib/apt/lists/*
 38 | 
 39 | SHELL ["/bin/bash", "-c"]
 40 | 
 41 | # Disable ipv6 to enable fetching gpg keys for rvm
 42 | # http://rvm.io/rvm/security#ipv6-issues
 43 | RUN mkdir -p /root/.gnupg \
 44 |   && echo 'disable-ipv6' >> /root/.gnupg/dirmngr.conf \
 45 |   && echo 'rvm_silence_path_mismatch_check_flag=1' >> /etc/rvmrc \
 46 |   && echo 'install: --no-document\nupdate: --no-document' >> /etc/.gemrc
 47 | 
 48 | RUN useradd --no-log-init --system --create-home --groups sudo system \
 49 |   && echo 'system ALL=(ALL:ALL) NOPASSWD:ALL' >> /etc/sudoers.d/system
 50 | 
 51 | RUN useradd --no-log-init --system --create-home customer
 52 | 
 53 | ###############################################################
 54 | # Run these steps as the 'system' user
 55 | #
 56 | USER system
 57 | 
 58 | # Install rvm
 59 | RUN set -ex \
 60 |   && for key in \
 61 |     7D2BAF1CF37B13E2069D6956105BD0E739499BDB \
 62 |     409B6B1796C275462A1703113804BB82D39DC0E3 \
 63 |   ; do \
 64 |     sudo gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys "$key" || \
 65 |     sudo gpg --batch --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys "$key" || \
 66 |     sudo gpg --batch --keyserver hkp://ipv4.pool.sks-keyservers.net --recv-keys "$key" || \
 67 |     sudo gpg --batch --keyserver hkp://pgp.mit.edu:80 --recv-keys "$key" || \
 68 |     sudo gpg --batch --keyserver hkp://keyserver.pgp.com --recv-keys "$key" ; \
 69 |   done \
 70 |   # We use 'sudo' here to support multi-user install
 71 |   # http://rvm.io/rvm/install#1-download-and-run-the-rvm-installation-script
 72 |   && \curl -sSL https://get.rvm.io | sudo -n bash -s stable
 73 | 
 74 | # Add 'customer' user to rvm group
 75 | RUN sudo usermod --append --groups rvm customer
 76 | 
 77 | 
 78 | ###############################################################
 79 | # Run these steps as the customer user
 80 | #
 81 | USER customer
 82 | 
 83 | # Configure rvm and install default Ruby
 84 | ENV RUBY_VERSION 2.7.5
 85 | ENV RUBY_VERSION_MIN 2.6.6
 86 | RUN source /usr/local/rvm/scripts/rvm \
 87 |   # Fail if deps are missing, won't prompt for sudo
 88 |   && rvm autolibs read-fail \
 89 |   && rvm install --no-docs $RUBY_VERSION \
 90 |   && rvm use --default $RUBY_VERSION \
 91 |   # Make rvm available in non-login bash shells
 92 |   && echo 'source /usr/local/rvm/scripts/rvm' >> ~/.bashrc
 93 | 
 94 | # Default to Node 20
 95 | ENV NODE_VERSION lts/iron
 96 | RUN curl https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash \
 97 |   && \. "$HOME/.nvm/nvm.sh" \
 98 |   && nvm install $NODE_VERSION
 99 | 
100 | 
101 | ###############################################################
102 | # Run these steps and the container as the 'root' user
103 | #
104 | # This is necessary because the build code needs to have
105 | # rights to switch to 'customer' user
106 | #
107 | USER root
108 | 
109 | WORKDIR /app
110 | 
111 | COPY --from=builder /app/dist/build .
112 | 
113 | CMD ["./build"]
114 | 


--------------------------------------------------------------------------------
/Dockerfile-test:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | SHELL ["/bin/bash", "-c"]
 4 | 
 5 | RUN groupadd -r rvm \
 6 |   && useradd --no-log-init --system --create-home --groups rvm customer
 7 | 
 8 | WORKDIR /app
 9 | 
10 | COPY ./requirements.txt ./requirements.txt
11 | COPY ./requirements-dev.txt ./requirements-dev.txt
12 | 
13 | RUN pip install -r requirements-dev.txt


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
10 | 
11 | ### No Copyright
12 | 
13 | The person who associated a work with this deed has dedicated the work to
14 | the public domain by waiving all of his or her rights to the work worldwide
15 | under copyright law, including all related and neighboring rights, to the
16 | extent allowed by law.
17 | 
18 | You can copy, modify, distribute and perform the work, even for commercial
19 | purposes, all without asking permission.
20 | 
21 | ### Other Information
22 | 
23 | In no way are the patent or trademark rights of any person affected by CC0,
24 | nor are the rights that other persons may have in the work or in how the
25 | work is used, such as publicity or privacy rights.
26 | 
27 | Unless expressly stated otherwise, the person who associated a work with
28 | this deed makes no warranties about the work, and disclaims liability for
29 | all uses of the work, to the fullest extent permitted by applicable law.
30 | When using or citing the work, you should not imply endorsement by the
31 | author or the affirmer.
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pages Build Container
  2 | 
  3 | Docker image for building and publishing static sites as part of the cloud.gov Pages platform.
  4 | 
  5 | Generally, site builds work in three stages: clone, build, and publish. Each stage is broken down into a number of steps. First, the container checks out the site from GitHub. Then it builds the site with the specified build engine. Then it gzip compresses text files and sets cache control headers. Finally, it uploads the built site to S3, and also creates redirect objects for directories, such as `/path` => `/path/`.
  6 | 
  7 | ## Usage
  8 | 
  9 | ### Command
 10 | ```
 11 | python main.py [options]
 12 | ```
 13 | 
 14 | ### Command options
 15 | One of the following flags *must* be specified:
 16 | 
 17 | | Flag | Example | Description |
 18 | | ---- | ------- | ----------- |
 19 | | `-p`, `--params` | `-p '{"foo": "bar"}'` | An encrypted JSON encoded string containing the [build arguments](#build-arguments) |
 20 | | `-f`, `--file` | `--file ./.local/my-build.json` | A path to a JSON file containing the [build arguments](#build-arguments) |
 21 | 
 22 | ### Using cloud.gov tasks
 23 | ```
 24 | cf run-task <APP_NAME> "cd app && python main.py [options]"
 25 | ```
 26 | 
 27 | ### Using `docker-compose`
 28 | ```
 29 | docker-compose run --rm app python main.py [options]
 30 | ```
 31 | 
 32 | ### Full examples
 33 | ```
 34 | # build arguments provided as a JSON encoded string
 35 | 
 36 | cf run-task pages-build-container "python main.py -p '{\"foo\": \"bar\"}'" --name "build-123"
 37 | ```
 38 | 
 39 | ```
 40 | # build arguments provided in a JSON encoded file
 41 | 
 42 | docker-compose run --rm app python main.py -f /tmp/local/my-build.json
 43 | ```
 44 | 
 45 | ## Environment variables
 46 | 
 47 | | Name | Optional? | VCAP Service | Description |
 48 | | ---- | :-------: | ------------ | ----------- |
 49 | | `CACHE_CONTROL` | Y | | Default value to set for the `Cache-Control` header of all published files, default is `max-age=60` |
 50 | | `DATABASE_URL` | N | | The URL of the database for database logging |
 51 | | `USER_ENVIRONMENT_VARIABLE_KEY` | N |  `federalist-{space}-uev-key` | Encryption key to decrypt user environment variables |
 52 | | `MAX_WORKERS` | N | | Maximum number of workers/threads to use when uploading files to S3 |
 53 | 
 54 | When running locally, environment variables are configured in `docker-compose.yml` under the `app` service.
 55 | 
 56 | ## Connected CF service
 57 | 
 58 | | Name | Type | Description |
 59 | | ---- | ---- | ----------- |
 60 | | `federalist-((env))-rds` | Brokered | The RDS db credentials |
 61 | | `federalist-((env))-uev-key` | User Provided | The site environment variable encryption key |
 62 | | `pages-((env))-encryption` | User Provided | The site build params encryption key |
 63 | 
 64 | ## Build arguments
 65 | 
 66 | | Name | Optional? | Default | Description |
 67 | | ---- | :-------: | ------- | ----------- |
 68 | | `aws_access_key_id` | N | | AWS access key for the destination S3 bucket |
 69 | | `aws_secret_access_key` | N | | AWS secret key for the destination S3 bucket |
 70 | | `aws_default_region` | N | | AWS region for the destination S3 bucket |
 71 | | `bucket` | N | | AWS S3 bucket name for the destination S3 bucket |
 72 | | `github_token` | Y | `None` | GitHub auth token for cloning the repository |
 73 | | `status_callback` | N | | The URL the container should use to report the status of the completed build (ie, success or failure) |
 74 | | `config` | Y | `None` | A yaml block of configuration to add to `_config.yml` before building. Currently only used in `jekyll` site builds |
 75 | | `generator` | N | | The engine to use to build the site (`'jekyll'`, `'hugo'`, `'node.js'`, or `'static'`) |
 76 | | `owner` | N | | The GitHub organization of the source repository |
 77 | | `repository` | N | | The name of source the repository |
 78 | | `branch` | N | | The branch of the source repository to build |
 79 | | `site_prefix` | N | | The S3 bucket "path" that the site files will be published to. It should **not** have a trailing or prefix slash (Ex. `preview/<OWNER>/<REPOSITORY>/<BRANCH>`) |
 80 | | `baseurl` | Y | `None` | The base URL that will be used by the build engine to determine the absolute path for site assets (blank for custom domains, the `site_prefix` with a preceding `/` for preview domains |
 81 | | `user_environment_variables` | Y | | Array of objects containing the name and encrypted values of user-provided environment variables (Ex. `[{ name: "MY ENV VAR", ciphertext: "ABC123" }]`) |
 82 | 
 83 | 
 84 | ### Encrypted params argument
 85 | 
 86 | When build parameters are passed to the build script using the `-p / --params` flag, they are an encrypted JSON encoded string created by the pages-core queue worker and decrypted using a shared key stored as CF user provided service `pages-<env>-encryption` and the [decrypt cipher](./src/crypto/decrypt.py).
 87 | 
 88 | ## Environment variables provided during builds
 89 | 
 90 | The following environment variables are available during site builds and when running the `federalist` npm script. They may be useful for customizing the display of certain information in the published site, for example, to display the current published branch name.
 91 | 
 92 | * `OWNER`
 93 | * `REPOSITORY`
 94 | * `BRANCH`
 95 | * `SITE_PREFIX`
 96 | * `BASEURL`
 97 | 
 98 | ## Development
 99 | 
100 | ### Getting started
101 | 
102 | #### Requirements
103 | - [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/)
104 | - AWS S3 bucket name and associated credentials (key, secret, region)
105 | - A Github repository with a Pages-compatible site
106 | - A Github Personal Access Token if building a private repository, see [creating a new personal token for your GitHub account](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) for more information.
107 | 
108 | #### Clone the repository
109 | ```sh
110 |   git clone git@github.com:cloud-gov/pages-build-container.git
111 |   cd pages-build-container
112 | ```
113 | 
114 | #### Create build arguments
115 | ```sh
116 |   mkdir -p .local
117 |   cp .local.sample.json .local/my-build.json
118 | ```
119 | 
120 | #### Update build arguments
121 | Update the appropriate fields to contain the desired values for your build, see [build arguments](#build-arguments) for options. The `.local` folder should not be checked into version control (it is in `.gitignore`) and will be mounted into the Docker container at `/tmp/local`.
122 | 
123 | #### Initialize the database
124 | This only needs to be once. To force a reinitialization of the database, remove the `tmp/db` folder in the project root and run the below command again.
125 | 
126 | ```sh
127 |   docker-compose run --rm db
128 | ```
129 | Then kill the process when it is done.
130 | 
131 | #### Run the build
132 | ```sh
133 |   docker-compose build
134 |   docker-compose run --rm app python main.py -f /tmp/local/my-build.json
135 | ```
136 | If the database is not ready when running a build (despite the healthcheck), just try running the build again.
137 | 
138 | #### Interact with the build environment
139 | ```sh
140 |   docker-compose run --rm app bash
141 |   python main.py -f /tmp/local/my-build.json
142 | ```
143 | 
144 | ### Inspecting the database
145 | 
146 | 1. Ensure the database is running (in the background)
147 | ```
148 | docker-compose up -d --no-deps db
149 | ```
150 | 
151 | 2. Run psql in the container
152 | ```
153 | docker-compose exec db psql -U postgres -d pages
154 | ```
155 | 
156 | ### Inspecting logs
157 | During or after builds the echoserver and database logs can be viewed with:
158 | ```sh
159 |   # all logs
160 |   docker-compose logs
161 | 
162 |   # only the echo server
163 |   docker-compose logs echoserver
164 | 
165 |   # only the db
166 |   docker-compose logs db
167 | ```
168 | 
169 | ### Testing
170 | 1. Build the test image
171 | ```sh
172 | docker-compose build test
173 | ```
174 | 
175 | 2. Run any testing steps
176 | ```sh
177 | # unit tests
178 | docker-compose run --rm test pytest
179 | 
180 | # unit tests with code coverage
181 | docker-compose run --rm test pytest --cov-report xml:./coverage/coverage.xml --cov-report html:./coverage --cov-report term --cov=src
182 | 
183 | # lint
184 | docker-compose run --rm test flake8
185 | 
186 | # static analysis
187 | docker-compose run --rm test bandit -r src
188 | ```
189 | 
190 | ### Continuous Integration
191 | We use Concourse CI for our CI/CD system. To use Concourse, one must have appropriate permissions in UAA as administered by the cloud.gov operators. Access to Concourse also requires using the GSA VPN.
192 | 
193 | 1. To get started install and authenticate with the `fly` CLI:
194 | - `brew install --cask fly`
195 | - `fly -t <Concourse Target Name> login -n pages -c <concourse url>`
196 | 
197 | 2. Update local credential files (see ci/vars/example.yml)
198 | 
199 | #### CI deployments
200 | This repository contains three distinct deployment pipelines in concourse:
201 | - [__build-container production__](./ci/pipeline-production.yml); starts on a new tag pushed to the `main` branch
202 | - [__build-container staging__](./ci/pipeline-staging.yml); starts on a new commit pushed to the `main` branch.
203 | - [__build-container dev__](./ci/pipeline-dev.yml); starts when a PR is created into the `main` branch.
204 | 
205 | Each pipeline runs tests, creates the appropriate site build container image, pushes it to ECR, and then deploys the image for the build container app.
206 | 
207 | ##### Pipeline instance variables
208 | Three instances of the pipeline are set for the `pages dev`, `pages staging` and `pages production` environments. Instance variables are used to fill in Concourse pipeline parameter variables bearing the same name as the instance variable. See more on [Concourse vars](https://concourse-ci.org/vars.html). Each instance of the pipeline has three instance variables associated to it: `deploy-env`, `git-branch`.
209 | 
210 | |Instance Variable|Pages Dev|Pages Staging|Pages Production|
211 | --- | --- | ---| ---|
212 | |**`deploy-env`**|`dev`|`staging`|`production`|
213 | 
214 | ## Public domain
215 | 
216 | This project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md):
217 | 
218 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
219 | >
220 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest.
221 | 
222 | [Federalist]: https://federalist.18f.gov
223 | [cloud.gov Pages]: https://cloud.gov/pages
224 | [Docker Compose]: https://docs.docker.com/compose/install/
225 | [Docker]: https://docs.docker.com/engine/installation/
226 | [pages-builder]: https://github.com/cloud-gov/pages-builder
227 | 


--------------------------------------------------------------------------------
/bin/migrate.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS buildlog (id serial PRIMARY KEY, build integer, source varchar, output varchar);
2 | ALTER TABLE buildlog ADD COLUMN IF NOT EXISTS "createdAt" timestamp;
3 | ALTER TABLE buildlog ADD COLUMN IF NOT EXISTS "updatedAt" timestamp;
4 | 


--------------------------------------------------------------------------------
/bin/push-docker-image.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eo pipefail
 3 | 
 4 | # federalist-garden-build-dev-task
 5 | TAG=$1
 6 | 
 7 | # Make sure local registry is running on localhost:5000
 8 | docker build --tag $TAG .
 9 | docker tag $TAG localhost:5000/$TAG
10 | docker push localhost:5000/$TAG
11 | 


--------------------------------------------------------------------------------
/ci/partials/audit.yml:
--------------------------------------------------------------------------------
1 | platform: linux
2 | inputs: [name: src]
3 | outputs: [name: src]
4 | run:
5 |   dir: src
6 |   path: ci/tasks/pip-audit.sh
7 | 


--------------------------------------------------------------------------------
/ci/partials/build.yml:
--------------------------------------------------------------------------------
 1 | platform: linux
 2 | inputs:
 3 | - name: src
 4 |   path: .
 5 | outputs:
 6 | - name: image
 7 | run:
 8 |   path: build
 9 | params:
10 |   BUILDKIT_SECRETTEXT_UA_TOKEN: ((ua-token))
11 | 


--------------------------------------------------------------------------------
/ci/partials/deploy.yml:
--------------------------------------------------------------------------------
1 | platform: linux
2 | inputs:
3 |   - name: src
4 |   - name: image-repository
5 | run:
6 |   dir: src
7 |   path: ci/tasks/deploy.sh
8 | 


--------------------------------------------------------------------------------
/ci/partials/test.yml:
--------------------------------------------------------------------------------
1 | platform: linux
2 | inputs: [name: src]
3 | outputs: [name: src]
4 | run:
5 |   dir: src
6 |   path: ci/tasks/test.sh
7 | 


--------------------------------------------------------------------------------
/ci/pipeline-dev.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #@ load("funcs.lib.yml", "slack_hook")
  3 | #@ load("@ytt:data", "data")
  4 | #@ load("@ytt:template", "template")
  5 | 
  6 | #!  JOBS
  7 | 
  8 | jobs:
  9 |   - name: set-pipeline
 10 |     plan:
 11 |       - get: src
 12 |         resource: pr-((deploy-env))
 13 |         trigger: true
 14 |       - get: pipeline-tasks
 15 |       - get: general-task
 16 |       - task: init
 17 |         image: general-task
 18 |         file: pipeline-tasks/tasks/init.yml
 19 |         params:
 20 |           PIPELINE_YML: src/ci/pipeline-dev.yml
 21 |       - set_pipeline: build-container
 22 |         file: compiled/set-pipeline.yml
 23 |         instance_vars:
 24 |           deploy-env: ((deploy-env))
 25 | 
 26 |   - name: test-((deploy-env))
 27 |     plan:
 28 |       - get: src
 29 |         resource: pr-((deploy-env))
 30 |         trigger: true
 31 |         passed: [set-pipeline]
 32 | 
 33 |       - put: src
 34 |         resource: pr-((deploy-env))
 35 |         params:
 36 |           path: src
 37 |           status: pending
 38 |           base_context: concourse
 39 |           context: test-pages-build-container-((deploy-env))
 40 |       - get: python
 41 |       - task: test
 42 |         image: python
 43 |         file: src/ci/partials/test.yml
 44 | 
 45 |     on_success:
 46 |       put: src
 47 |       resource: pr-((deploy-env))
 48 |       params:
 49 |         path: src
 50 |         status: success
 51 |         base_context: concourse
 52 |         context: test-pages-build-container-((deploy-env))
 53 | 
 54 |     on_failure:
 55 |       in_parallel:
 56 |         - put: src
 57 |           resource: pr-((deploy-env))
 58 |           params:
 59 |             path: src
 60 |             status: failure
 61 |             base_context: concourse
 62 |             context: test-pages-build-container-((deploy-env))
 63 |         -  #@ slack_hook("failure", "tests")
 64 | 
 65 |   - name: deploy-((deploy-env))
 66 |     plan:
 67 |       - get: src
 68 |         resource: pr-((deploy-env))
 69 |         trigger: true
 70 |         passed: [test-((deploy-env))]
 71 |       - get: general-task
 72 |       - get: oci-build-task
 73 |       - task: build
 74 |         privileged: true
 75 |         image: oci-build-task
 76 |         file: src/ci/partials/build.yml
 77 | 
 78 |       - put: image-repository
 79 |         params:
 80 |           image: image/image.tar
 81 |       - task: deploy
 82 |         image: general-task
 83 |         file: src/ci/partials/deploy.yml
 84 |         params:
 85 |           _: #@ template.replace(data.values.env_cf)
 86 |           CF_APP_NAME: pages-build-container-((deploy-env))
 87 |           CF_MANIFEST: .cloudgov/manifest.yml
 88 |           CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml
 89 |           IMAGE_REPOSITORY: ../image-repository/repository
 90 |           IMAGE_TAG: pages-((deploy-env))
 91 |           CF_DOCKER_USERNAME: ((ecr-aws-key))
 92 |           CF_DOCKER_PASSWORD: ((ecr-aws-secret))
 93 | 
 94 |     on_failure: #@ slack_hook("failure", "deployment")
 95 | 
 96 |   - name: audit-dependencies
 97 |     plan:
 98 |       - get: src
 99 |         resource: pr-((deploy-env))
100 |         trigger: true
101 |         passed: [set-pipeline]
102 | 
103 |       - put: src
104 |         resource: pr-((deploy-env))
105 |         params:
106 |           path: src
107 |           status: pending
108 |           base_context: concourse
109 |           context: audit-dependencies
110 | 
111 |       - get: python
112 |       - task: pip-audit
113 |         image: python
114 |         file: src/ci/partials/audit.yml
115 | 
116 |     on_failure:
117 |       in_parallel:
118 |         - put: src
119 |           resource: pr-((deploy-env))
120 |           params:
121 |             path: src
122 |             status: failure
123 |             base_context: concourse
124 |             context: audit-dependencies
125 |         -  #@ slack_hook("failure", "dependency audit")
126 | 
127 |     on_success:
128 |       in_parallel:
129 |         - put: src
130 |           resource: pr-((deploy-env))
131 |           params:
132 |             path: src
133 |             status: success
134 |             base_context: concourse
135 |             context: audit-dependencies
136 |         -  #@ slack_hook("success", "dependency audit")
137 | 
138 | #!  RESOURCES
139 | 
140 | resources:
141 |   - name: pr-((deploy-env))
142 |     type: pull-request
143 |     check_every: 1m
144 |     source:
145 |       repository: ((build-container-repository-path))
146 |       access_token: ((gh-access-token))
147 |       base_branch: main
148 |       disable_forks: true
149 |       ignore_drafts: false
150 | 
151 |   - name: image-repository
152 |     type: registry-image
153 |     source:
154 |       aws_access_key_id: ((ecr-aws-key))
155 |       aws_secret_access_key: ((ecr-aws-secret))
156 |       repository: pages-build-container
157 |       aws_region: us-gov-west-1
158 |       tag: pages-((deploy-env))
159 | 
160 |   - name: slack
161 |   - name: pipeline-tasks
162 |   - name: python
163 |   - name: general-task
164 |   - name: oci-build-task
165 | 
166 | #!  RESOURCE TYPES
167 | 
168 | resource_types:
169 |   - name: git
170 |   - name: slack-notification
171 |   - name: pull-request
172 |   - name: registry-image
173 | 


--------------------------------------------------------------------------------
/ci/pipeline-production.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #@ load("funcs.lib.yml", "slack_hook")
  3 | #@ load("@ytt:data", "data")
  4 | #@ load("@ytt:template", "template")
  5 | 
  6 | #!  JOBS
  7 | 
  8 | jobs:
  9 |   - name: set-pipeline
 10 |     plan:
 11 |       - get: src
 12 |         resource: src-((deploy-env))-tagged
 13 |         params: { depth: 1 }
 14 |         trigger: true
 15 |       - get: pipeline-tasks
 16 |       - get: general-task
 17 |       - task: init
 18 |         image: general-task
 19 |         file: pipeline-tasks/tasks/init.yml
 20 |         params:
 21 |           PIPELINE_YML: src/ci/pipeline-production.yml
 22 |       - set_pipeline: build-container
 23 |         file: compiled/set-pipeline.yml
 24 |         instance_vars:
 25 |           deploy-env: ((deploy-env))
 26 | 
 27 |   - name: test-((deploy-env))
 28 |     plan:
 29 |       - get: src
 30 |         resource: src-((deploy-env))-tagged
 31 |         trigger: true
 32 |         params: { depth: 1 }
 33 |         passed: [set-pipeline]
 34 |       - get: python
 35 |       - task: test
 36 |         image: python
 37 |         file: src/ci/partials/test.yml
 38 | 
 39 |     on_failure: #@ slack_hook("failure", "tests")
 40 | 
 41 |   - name: deploy-((deploy-env))
 42 |     plan:
 43 |       - get: src
 44 |         resource: src-((deploy-env))-tagged
 45 |         trigger: true
 46 |         params: { depth: 1 }
 47 |         passed: [test-((deploy-env)), audit-dependencies]
 48 |       - get: general-task
 49 |       - get: oci-build-task
 50 |       - task: build
 51 |         privileged: true
 52 |         image: oci-build-task
 53 |         file: src/ci/partials/build.yml
 54 |       - put: image-repository
 55 |         params:
 56 |           image: image/image.tar
 57 |       - task: deploy
 58 |         image: general-task
 59 |         file: src/ci/partials/deploy.yml
 60 |         params:
 61 |           _: #@ template.replace(data.values.env_cf)
 62 |           CF_APP_NAME: pages-build-container-((deploy-env))
 63 |           CF_MANIFEST: .cloudgov/manifest.yml
 64 |           CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml
 65 |           IMAGE_REPOSITORY: ../image-repository/repository
 66 |           IMAGE_TAG: pages-((deploy-env))
 67 |           CF_DOCKER_USERNAME: ((ecr-aws-key))
 68 |           CF_DOCKER_PASSWORD: ((ecr-aws-secret))
 69 | 
 70 |     on_success: #@ slack_hook("success", "deployment")
 71 |     on_failure: #@ slack_hook("failure", "deployment")
 72 | 
 73 |   - name: audit-dependencies
 74 |     plan:
 75 |       - get: src
 76 |         resource: src-((deploy-env))-tagged
 77 |         trigger: true
 78 |         passed: [set-pipeline]
 79 | 
 80 |       - get: python
 81 |       - task: pip-audit
 82 |         image: python
 83 |         file: src/ci/partials/audit.yml
 84 | 
 85 |     on_failure: #@ slack_hook("failure", "dependency audit")
 86 |     on_success: #@ slack_hook("success", "dependency audit")
 87 | 
 88 |   - name: release
 89 |     plan:
 90 |       - get: src
 91 |         resource: src-((deploy-env))-tagged
 92 |         params: { depth: 1 }
 93 |         trigger: true
 94 |         passed: [deploy-((deploy-env))]
 95 |       -  #@ template.replace(data.values.release_steps)
 96 | 
 97 | #!  RESOURCES
 98 | 
 99 | resources:
100 |   - name: src-((deploy-env))-tagged
101 |     type: git
102 |     icon: github
103 |     source:
104 |       uri: ((git-base-url))/((build-container-repository-path))
105 |       branch: main
106 |       commit_verification_keys: ((cloud-gov-pages-gpg-keys))
107 |       tag_filter: 0.*.*
108 |       fetch_tags: true
109 | 
110 |   - name: image-repository
111 |     type: registry-image
112 |     source:
113 |       aws_access_key_id: ((ecr-aws-key))
114 |       aws_secret_access_key: ((ecr-aws-secret))
115 |       repository: pages-build-container
116 |       aws_region: us-gov-west-1
117 |       tag: pages-((deploy-env))
118 | 
119 |   - name: pages-release
120 |     type: github-release
121 |     source:
122 |       owner: cloud-gov
123 |       repository: pages-build-container
124 |       access_token: ((gh-access-token))
125 | 
126 |   - name: slack
127 |   - name: pipeline-tasks
128 |   - name: python
129 |   - name: general-task
130 |   - name: oci-build-task
131 | 
132 | #!  RESOURCE TYPES
133 | 
134 | resource_types:
135 |   - name: git
136 |   - name: slack-notification
137 |   - name: pull-request
138 |   - name: registry-image
139 |   - name: github-release
140 | 


--------------------------------------------------------------------------------
/ci/pipeline-staging.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #@ load("funcs.lib.yml", "slack_hook")
  3 | #@ load("@ytt:data", "data")
  4 | #@ load("@ytt:template", "template")
  5 | 
  6 | #!  JOBS
  7 | 
  8 | jobs:
  9 |   - name: set-pipeline
 10 |     plan:
 11 |       - get: src
 12 |         resource: src-((deploy-env))
 13 |         params: { depth: 1 }
 14 |         trigger: true
 15 |       - get: pipeline-tasks
 16 |       - get: general-task
 17 |       - task: init
 18 |         image: general-task
 19 |         file: pipeline-tasks/tasks/init.yml
 20 |         params:
 21 |           PIPELINE_YML: src/ci/pipeline-staging.yml
 22 |       - set_pipeline: build-container
 23 |         file: compiled/set-pipeline.yml
 24 |         instance_vars:
 25 |           deploy-env: ((deploy-env))
 26 | 
 27 |   - name: update-release-branch
 28 |     plan:
 29 |       - get: src
 30 |         resource: src-((deploy-env))
 31 |         trigger: true
 32 |       - get: general-task
 33 |       - get: pipeline-tasks
 34 |       - task: update-release-branch
 35 |         image: general-task
 36 |         file: pipeline-tasks/tasks/update-release-branch.yml
 37 | 
 38 |   - name: test-((deploy-env))
 39 |     plan:
 40 |       - get: src
 41 |         resource: src-((deploy-env))
 42 |         trigger: true
 43 |         params: { depth: 1 }
 44 |         passed: [set-pipeline]
 45 |       - get: python
 46 |       - task: test
 47 |         image: python
 48 |         file: src/ci/partials/test.yml
 49 | 
 50 |     on_failure: #@ slack_hook("failure", "tests")
 51 | 
 52 |   - name: deploy-((deploy-env))
 53 |     plan:
 54 |       - get: src
 55 |         resource: src-((deploy-env))
 56 |         trigger: true
 57 |         params: { depth: 1 }
 58 |         passed: [test-((deploy-env)), audit-dependencies]
 59 |       - get: general-task
 60 |       - get: oci-build-task
 61 |       - task: build
 62 |         privileged: true
 63 |         image: oci-build-task
 64 |         file: src/ci/partials/build.yml
 65 |       - put: image-repository
 66 |         params:
 67 |           image: image/image.tar
 68 |       - task: deploy
 69 |         image: general-task
 70 |         file: src/ci/partials/deploy.yml
 71 |         params:
 72 |           _: #@ template.replace(data.values.env_cf)
 73 |           CF_APP_NAME: pages-build-container-((deploy-env))
 74 |           CF_MANIFEST: .cloudgov/manifest.yml
 75 |           CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml
 76 |           IMAGE_REPOSITORY: ../image-repository/repository
 77 |           IMAGE_TAG: pages-((deploy-env))
 78 |           CF_DOCKER_USERNAME: ((ecr-aws-key))
 79 |           CF_DOCKER_PASSWORD: ((ecr-aws-secret))
 80 | 
 81 |     on_success: #@ slack_hook("success", "deployment")
 82 |     on_failure: #@ slack_hook("failure", "deployment")
 83 | 
 84 |   - name: audit-dependencies
 85 |     plan:
 86 |       - get: src
 87 |         resource: src-((deploy-env))
 88 |         trigger: true
 89 |         passed: [set-pipeline]
 90 | 
 91 |       - get: python
 92 |       - task: pip-audit
 93 |         image: python
 94 |         file: src/ci/partials/audit.yml
 95 | 
 96 |     on_failure: #@ slack_hook("failure", "dependency audit")
 97 |     on_success: #@ slack_hook("success", "dependency audit")
 98 | 
 99 | #!  RESOURCES
100 | 
101 | resources:
102 |   - name: src-((deploy-env))
103 |     type: git
104 |     icon: github
105 |     source:
106 |       uri: git@github.com:/((build-container-repository-path))
107 |       branch: main
108 |       commit_verification_keys: ((cloud-gov-pages-gpg-keys))
109 |       private_key: ((pages-gpg-operations-github-sshkey.private_key))
110 | 
111 |   - name: image-repository
112 |     type: registry-image
113 |     source:
114 |       aws_access_key_id: ((ecr-aws-key))
115 |       aws_secret_access_key: ((ecr-aws-secret))
116 |       repository: pages-build-container
117 |       aws_region: us-gov-west-1
118 |       tag: pages-((deploy-env))
119 | 
120 |   - name: slack
121 |   - name: pipeline-tasks
122 |   - name: python
123 |   - name: general-task
124 |   - name: oci-build-task
125 | 
126 | #!  RESOURCE TYPES
127 | 
128 | resource_types:
129 |   - name: git
130 |   - name: slack-notification
131 |   - name: pull-request
132 |   - name: registry-image
133 | 


--------------------------------------------------------------------------------
/ci/pipeline.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | #@ load("funcs.lib.yml", "slack_hook", "pr_hook")
  3 | #@ load("@ytt:data", "data")
  4 | #@ load("@ytt:template", "template")
  5 | 
  6 | #@ env = data.values.env
  7 | 
  8 | #!  JOBS
  9 | 
 10 | jobs:
 11 |   - name: set-pipeline
 12 |     plan:
 13 |       - get: src
 14 |         trigger: true
 15 |         params:
 16 |           #@ if/end env != 'dev':
 17 |           depth: 1
 18 |           #@ if/end env == 'dev':
 19 |           integration_tool: checkout
 20 |       - get: pipeline-tasks
 21 |       - get: general-task
 22 |       - task: boot
 23 |         image: general-task
 24 |         file: pipeline-tasks/tasks/boot.yml
 25 |         params:
 26 |           ENV_OVERRIDE: ((deploy-env))
 27 |       - set_pipeline: self
 28 |         file: compiled/set-pipeline.yml
 29 |         instance_vars:
 30 |           deploy-env: ((deploy-env))
 31 | 
 32 |   #@ if/end env == 'staging':
 33 |   - name: update-release-branch
 34 |     plan:
 35 |       - get: src
 36 |         trigger: true
 37 |       - get: general-task
 38 |       - get: pipeline-tasks
 39 |       - task: update-release-branch
 40 |         image: general-task
 41 |         file: pipeline-tasks/tasks/update-release-branch.yml
 42 | 
 43 |   - name: test
 44 |     plan:
 45 |       - get: src
 46 |         trigger: true
 47 |         passed: [set-pipeline]
 48 |         params:
 49 |           #@ if/end env != 'dev':
 50 |           depth: 1
 51 |           #@ if/end env == 'dev':
 52 |           integration_tool: checkout
 53 |       #@ if/end env == 'dev':
 54 |       -  #@ pr_hook("pending", "test-pages-build-container")
 55 |       - get: python
 56 |       - task: test
 57 |         image: python
 58 |         file: src/ci/partials/test.yml
 59 | 
 60 |     on_success:
 61 |       in_parallel:
 62 |         -  #@ slack_hook("success", "tests")
 63 |         #@ if/end env == 'dev':
 64 |         -  #@ pr_hook("success", "test-pages-build-container")
 65 | 
 66 |     on_failure:
 67 |       in_parallel:
 68 |         -  #@ slack_hook("failure", "tests")
 69 |         #@ if/end env == 'dev':
 70 |         -  #@ pr_hook("failure", "test-pages-build-container")
 71 | 
 72 |   - name: deploy
 73 |     plan:
 74 |       - get: src
 75 |         trigger: true
 76 |         passed:
 77 |           - test
 78 |           #@ if/end env != 'dev':
 79 |           - audit-dependencies
 80 |         params:
 81 |           #@ if/end env != 'dev':
 82 |           depth: 1
 83 |           #@ if/end env == 'dev':
 84 |           integration_tool: checkout
 85 |       - get: general-task
 86 |       - get: oci-build-task
 87 |       - task: build
 88 |         privileged: true
 89 |         image: oci-build-task
 90 |         file: src/ci/partials/build.yml
 91 | 
 92 |       - put: image-repository
 93 |         params:
 94 |           image: image/image.tar
 95 |       - task: deploy
 96 |         image: general-task
 97 |         file: src/ci/partials/deploy.yml
 98 |         params:
 99 |           _: #@ template.replace(data.values.env_cf)
100 |           CF_APP_NAME: pages-build-container-((deploy-env))
101 |           CF_MANIFEST: .cloudgov/manifest.yml
102 |           CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml
103 |           IMAGE_REPOSITORY: ../image-repository/repository
104 |           IMAGE_TAG: pages-((deploy-env))
105 |           CF_DOCKER_USERNAME: ((ecr-aws-key))
106 |           CF_DOCKER_PASSWORD: ((ecr-aws-secret))
107 | 
108 |     on_success:
109 |       in_parallel:
110 |         -  #@ slack_hook("success", "deployment")
111 |         #@ if/end env == 'dev':
112 |         -  #@ pr_hook("success", "deploy-pages-build-container")
113 | 
114 |     on_failure:
115 |       in_parallel:
116 |         -  #@ slack_hook("failure", "deployment")
117 |         #@ if/end env == 'dev':
118 |         -  #@ pr_hook("failure", "deploy-pages-build-container")
119 | 
120 |   - name: audit-dependencies
121 |     plan:
122 |       - get: src
123 |         trigger: true
124 |         passed: [set-pipeline]
125 |         params:
126 |           #@ if/end env != 'dev':
127 |           depth: 1
128 |           #@ if/end env == 'dev':
129 |           integration_tool: checkout
130 | 
131 |       #@ if/end env == 'dev':
132 |       -  #@ pr_hook("pending", "audit-dependencies")
133 | 
134 |       - get: python
135 |       - task: pip-audit
136 |         image: python
137 |         file: src/ci/partials/audit.yml
138 | 
139 |     on_success:
140 |       in_parallel:
141 |         -  #@ slack_hook("success", "dependency audit")
142 |         #@ if/end env == 'dev':
143 |         -  #@ pr_hook("success", "audit-dependencies")
144 | 
145 |     on_failure:
146 |       in_parallel:
147 |         -  #@ slack_hook("failure", "dependency audit")
148 |         #@ if/end env == 'dev':
149 |         -  #@ pr_hook("failure", "audit-dependencies")
150 | 
151 |   #@ if/end env == 'production':
152 |   - name: release
153 |     plan:
154 |       - get: src
155 |         params: { depth: 1 }
156 |         trigger: true
157 |         passed: [deploy]
158 |       -  #@ template.replace(data.values.release_steps)
159 | 
160 | #!  RESOURCES
161 | 
162 | resources:
163 |   #@ if/end env == 'dev':
164 |   - name: src
165 |     type: pull-request
166 |     check_every: 1m
167 |     source:
168 |       repository: ((build-container-repository-path))
169 |       access_token: ((gh-access-token))
170 |       base_branch: main
171 |       disable_forks: true
172 |       ignore_drafts: false
173 | 
174 |   #@ if/end env == 'staging':
175 |   - name: src
176 |     type: git
177 |     icon: github
178 |     source:
179 |       uri: git@github.com:/((build-container-repository-path))
180 |       branch: main
181 |       commit_verification_keys: ((cloud-gov-pages-gpg-keys))
182 |       private_key: ((pages-gpg-operations-github-sshkey.private_key))
183 | 
184 |   #@ if env == 'production':
185 |   - name: src
186 |     icon: github
187 |     source:
188 |       uri: ((git-base-url))/((build-container-repository-path))
189 |       branch: main
190 |       commit_verification_keys: ((cloud-gov-pages-gpg-keys))
191 |       tag_filter: 0.*.*
192 |       fetch_tags: true
193 | 
194 |   - name: pages-release
195 |     type: github-release
196 |     source:
197 |       owner: cloud-gov
198 |       repository: pages-build-container
199 |       access_token: ((gh-access-token))
200 |   #@ end
201 | 
202 |   - name: image-repository
203 |     type: registry-image
204 |     source:
205 |       aws_access_key_id: ((ecr-aws-key))
206 |       aws_secret_access_key: ((ecr-aws-secret))
207 |       repository: pages-build-container
208 |       aws_region: us-gov-west-1
209 |       tag: pages-((deploy-env))
210 | 
211 |   - name: slack
212 |   - name: pipeline-tasks
213 |   - name: python
214 |   - name: general-task
215 |   - name: oci-build-task
216 | 
217 | #!  RESOURCE TYPES
218 | 
219 | resource_types:
220 |   - name: git
221 |   - name: slack-notification
222 |   - name: registry-image
223 |   #@ if/end env == 'dev':
224 |   - name: pull-request
225 |   #@ if/end env == 'production':
226 |   - name: github-release
227 | 


--------------------------------------------------------------------------------
/ci/tasks/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cf api $CF_API
 6 | cf auth
 7 | 
 8 | cf t -o $CF_ORG -s $CF_SPACE
 9 | 
10 | cf push $CF_APP_NAME \
11 |   -f $CF_MANIFEST \
12 |   --vars-file $CF_VARS_FILE \
13 |   --docker-image "$(cat ${IMAGE_REPOSITORY}):${IMAGE_TAG}" \
14 |   --docker-username ${CF_DOCKER_USERNAME}
15 | 


--------------------------------------------------------------------------------
/ci/tasks/pip-audit.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -euo pipefail
4 | 
5 | pip install pip-audit
6 | 
7 | python3 -m pip_audit -r ./requirements.txt
8 | 


--------------------------------------------------------------------------------
/ci/tasks/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | getent group rvm || groupadd -r rvm
 6 | id -u customer &>/dev/null || useradd --no-log-init --system --create-home --groups rvm customer
 7 | 
 8 | pip install -r requirements-dev.txt
 9 | flake8
10 | bandit -r src
11 | 
12 | pytest --cov-report xml:./coverage/coverage.xml --cov-report html:./coverage --cov-report term --cov=src; status=$?
13 | 
14 | exit $status
15 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   exp:
 4 |     build:
 5 |       context: .
 6 |       dockerfile: Dockerfile-exp
 7 |     volumes:
 8 |       - ./.local:/tmp/local:ro
 9 |     links:
10 |       - echoserver
11 |       - db
12 |     depends_on:
13 |       - echoserver
14 |       - db
15 |     environment:
16 |       CACHE_CONTROL: max-age=60
17 |       DATABASE_URL: postgresql://postgres:password@db/pages
18 |       USER_ENVIRONMENT_VARIABLE_KEY: shhhhhhh
19 | 
20 |   app:
21 |     build:
22 |       context: .
23 |     volumes:
24 |       - ./src:/app:ro
25 |       - ./.local:/tmp/local:ro
26 |     links:
27 |       - echoserver
28 |       - db
29 |     depends_on:
30 |       - echoserver
31 |       - db
32 |     environment:
33 |       CACHE_CONTROL: max-age=60
34 |       DATABASE_URL: postgresql://postgres:password@db/pages
35 |       USER_ENVIRONMENT_VARIABLE_KEY: shhhhhhh
36 | 
37 |   echoserver:
38 |     # simple python server to log requests during development
39 |     build:
40 |       context: ./echo-server
41 |       dockerfile: Dockerfile
42 |     container_name: echoserver
43 |     volumes:
44 |       - ./echo-server:/code
45 |     environment:
46 |       PORT: 8989
47 |     ports:
48 |       - "8989:8989"
49 | 
50 |   db:
51 |     build:
52 |       context: .
53 |       dockerfile: Dockerfile-db
54 |     container_name: db
55 |     volumes:
56 |       - ./tmp/db:/var/lib/postgresql/data
57 |     healthcheck:
58 |       test: ["CMD-SHELL", "pg_isready -U postgres -d pages"]
59 |       interval: 10s
60 |       timeout: 5s
61 |       retries: 5      
62 |     environment:
63 |       POSTGRES_PASSWORD: password
64 |       POSTGRES_USER: postgres
65 |       POSTGRES_DB: pages
66 | 
67 |   test:
68 |     build:
69 |       context: .
70 |       dockerfile: Dockerfile-test
71 |     container_name: test
72 |     volumes:
73 |       - ./:/app


--------------------------------------------------------------------------------
/docker/ua-attach-config.sh:
--------------------------------------------------------------------------------
1 | UA_TOKEN=`cat /run/secrets/UA_TOKEN`
2 | 
3 | echo "Configuring ua attach config"
4 | cat <<EOF >> ua-attach-config.yaml
5 | token: $UA_TOKEN
6 | enable_services:
7 | - usg
8 | - esm-infra


--------------------------------------------------------------------------------
/echo-server/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6-alpine
2 | 
3 | WORKDIR /code
4 | ADD run.py /code
5 | 
6 | EXPOSE 8989
7 | 
8 | CMD ["python", "run.py"]
9 | 


--------------------------------------------------------------------------------
/echo-server/run.py:
--------------------------------------------------------------------------------
 1 | # Reflects the requests from HTTP methods GET, POST, PUT, and DELETE
 2 | #
 3 | # Based on https://gist.github.com/1kastner/e083f9e813c0464e6a2ec8910553e632
 4 | 
 5 | import os
 6 | import base64
 7 | import json
 8 | import threading
 9 | 
10 | from http.server import HTTPServer, BaseHTTPRequestHandler
11 | 
12 | 
13 | def flush_print(s):
14 |     print(s, flush=True)
15 | 
16 | 
17 | def decodeb64(s):
18 |     return str(base64.b64decode(s), 'utf-8')
19 | 
20 | 
21 | class StoppableHTTPServer(HTTPServer):
22 |     def run(self):
23 |         try:
24 |             self.serve_forever()
25 |         except Exception:  # pylint: disable=W0703
26 |             pass
27 |         finally:
28 |             self.server_close()
29 | 
30 | 
31 | class RequestHandler(BaseHTTPRequestHandler):
32 |     def do_GET(self):
33 |         flush_print(f"\n{self.command} {self.path}")
34 | 
35 |         self.send_response(200)
36 |         self.end_headers()
37 | 
38 |     def do_POST(self):
39 |         flush_print(f"\n{self.command} {self.path}")
40 | 
41 |         content_length = self.headers.get('Content-Length')
42 |         length = int(content_length) if content_length else 0
43 |         payload = self.rfile.read(length)
44 | 
45 |         content_type = self.headers.get('Content-Type')
46 |         if content_type == 'application/json':
47 |             payload_json = json.loads(str(payload, 'utf-8'))
48 |             if payload_json.get('output'):
49 |                 payload_json['output'] = decodeb64(payload_json['output'])
50 |             if payload_json.get('message'):
51 |                 payload_json['message'] = decodeb64(payload_json['message'])
52 | 
53 |             payload = json.dumps(payload_json)
54 | 
55 |         flush_print(f"  {payload}")
56 | 
57 |         self.send_response(200)
58 |         self.end_headers()
59 | 
60 |     do_PUT = do_POST
61 |     do_DELETE = do_GET
62 | 
63 | 
64 | def main():
65 |     port = int(os.getenv('PORT', 8080))
66 |     host = os.getenv('HOST', '0.0.0.0')
67 |     print(f'Listening on {host}:{port}')
68 |     server = StoppableHTTPServer((host, port), RequestHandler)
69 | 
70 |     # Start processing requests
71 |     thread = threading.Thread(None, server.run)
72 |     thread.start()
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | test_paths = src test
3 | addopts = --doctest-modules
4 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # Include the production dependencies
 2 | -r requirements.txt
 3 | 
 4 | # Testing and development dependencies
 5 | bandit>=1.0,<2.0
 6 | flake8==3.8.3
 7 | moto==5.0.1
 8 | pip-audit==2.7.3
 9 | pyfakefs==4.0.2
10 | pyflakes==2.2.0
11 | pylint==2.5.3
12 | pytest-cov==4.1.0
13 | pytest==7.4.4
14 | requests-mock==1.8.0
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.32.4
2 | boto3==1.34.102
3 | stopit==1.1.2
4 | psycopg2==2.9.9
5 | cryptography==44.0.1
6 | pyyaml==6.0.1
7 | psutil==5.9.4
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 100


--------------------------------------------------------------------------------
/src/build.py:
--------------------------------------------------------------------------------
  1 | '''Main entrypoint'''
  2 | 
  3 | import os
  4 | import sys
  5 | from datetime import datetime
  6 | from stopit import TimeoutException, SignalTimeout as Timeout
  7 | import boto3
  8 | from functools import partial
  9 | 
 10 | from common import CLONE_DIR_PATH
 11 | 
 12 | from log_utils import (
 13 |     delta_to_mins_secs, get_logger, init_logging,
 14 |     RepeatTimer, log_monitoring_metrics
 15 | )
 16 | from log_utils.remote_logs import (
 17 |     post_build_complete, post_build_error,
 18 |     post_build_timeout, post_build_processing,
 19 |     post_metrics,
 20 | )
 21 | 
 22 | from crypto.decrypt import decrypt
 23 | 
 24 | import repo_config
 25 | 
 26 | from steps import (
 27 |     build_hugo, build_jekyll, build_static, download_hugo,
 28 |     fetch_repo, publish, run_build_script, run_step, fetch_commit_sha,
 29 |     setup_bundler, setup_node, setup_ruby, StepException, update_repo
 30 | )
 31 | 
 32 | TIMEOUT_SECONDS = 45 * 60  # 45 minutes
 33 | 
 34 | GENERATORS = ['hugo', 'jekyll', 'node.js', 'static']
 35 | 
 36 | 
 37 | def build(
 38 |     aws_access_key_id,
 39 |     aws_default_region,
 40 |     aws_secret_access_key,
 41 |     status_callback,
 42 |     baseurl,
 43 |     branch,
 44 |     bucket,
 45 |     build_id,
 46 |     config,
 47 |     generator,
 48 |     github_token,
 49 |     owner,
 50 |     repository,
 51 |     site_prefix,
 52 |     user_environment_variables=[]
 53 | ):
 54 |     '''
 55 |     Main task to run a full site build process.
 56 | 
 57 |     All values needed for the build are loaded from
 58 |     environment variables.
 59 |     '''
 60 |     # keep track of total time
 61 |     start_time = datetime.now()
 62 | 
 63 |     logger = None
 64 |     commit_sha = None
 65 |     thread = None
 66 | 
 67 |     cache_control = os.getenv('CACHE_CONTROL', 'max-age=60')
 68 |     database_url = os.environ['DATABASE_URL']
 69 |     user_environment_variable_key = os.environ['USER_ENVIRONMENT_VARIABLE_KEY']
 70 | 
 71 |     try:
 72 |         post_build_processing(status_callback)
 73 |         # throw a timeout exception after TIMEOUT_SECONDS
 74 |         with Timeout(TIMEOUT_SECONDS, swallow_exc=False):
 75 |             build_info = f'{owner}/{repository}@id:{build_id}'
 76 | 
 77 |             decrypted_uevs = decrypt_uevs(user_environment_variable_key, user_environment_variables)
 78 | 
 79 |             priv_vals = [uev['value'] for uev in decrypted_uevs]
 80 |             priv_vals.append(aws_access_key_id)
 81 |             priv_vals.append(aws_secret_access_key)
 82 |             if github_token:
 83 |                 priv_vals.append(github_token)
 84 | 
 85 |             logattrs = {
 86 |                 'branch': branch,
 87 |                 'buildid': build_id,
 88 |                 'owner': owner,
 89 |                 'repository': repository,
 90 |             }
 91 | 
 92 |             init_logging(priv_vals, logattrs, database_url)
 93 | 
 94 |             logger = get_logger('main')
 95 | 
 96 |             # partially apply the callback url to post_metrics
 97 |             post_metrics_p = partial(post_metrics, status_callback)
 98 | 
 99 |             logger.info(f'Running build for {owner}/{repository}/{branch}')
100 | 
101 |             if generator not in GENERATORS:
102 |                 raise ValueError(f'Invalid generator: {generator}')
103 | 
104 |             # start a separate scheduled thread for memory/cpu monitoring
105 |             MONITORING_INTERVAL = 30
106 |             monitoring_logger = get_logger('monitor')
107 |             thread = RepeatTimer(
108 |                 MONITORING_INTERVAL,
109 |                 log_monitoring_metrics,
110 |                 [monitoring_logger, post_metrics_p],
111 |             )
112 |             thread.start()
113 | 
114 |             # S3 client used in multiple steps
115 |             s3_client = boto3.client(
116 |                 service_name='s3',
117 |                 aws_access_key_id=aws_access_key_id,
118 |                 aws_secret_access_key=aws_secret_access_key,
119 |                 region_name=aws_default_region
120 |             )
121 | 
122 |             ##
123 |             # FETCH
124 |             #
125 |             run_step(
126 |                 fetch_repo,
127 |                 'There was a problem fetching the repository, see the above logs for details.',
128 |                 owner, repository, branch, github_token,
129 |             )
130 | 
131 |             commit_sha = fetch_commit_sha(CLONE_DIR_PATH)
132 | 
133 |             federalist_config = repo_config.from_json_file(
134 |                 CLONE_DIR_PATH,
135 |                 dict(
136 |                     headers=dict([('cache-control', cache_control)]),
137 |                     excludePaths=[
138 |                         '*/Dockerfile',
139 |                         '*/docker-compose.yml',
140 |                         '/federalist.json',
141 |                         '/pages.json'
142 |                     ],
143 |                     includePaths=['/.well-known/security.txt']
144 |                 )
145 |             )
146 | 
147 |             if federalist_config.full_clone():
148 |                 run_step(
149 |                     update_repo,
150 |                     'There was a problem updating the repository, see the above logs for details.',
151 |                     CLONE_DIR_PATH,
152 |                 )
153 | 
154 |             ##
155 |             # BUILD
156 |             #
157 |             run_step(
158 |                 setup_node,
159 |                 'There was a problem setting up Node, see the above logs for details.',
160 |                 federalist_config.should_cache(),
161 |                 bucket,
162 |                 s3_client,
163 |                 post_metrics_p,
164 |             )
165 | 
166 |             # Run the npm `federalist` task (if it is defined)
167 |             run_step(
168 |                 run_build_script,
169 |                 'There was a problem running the federalist script, see the above logs for details.',  # noqa: E501
170 |                 branch, owner, repository, site_prefix, baseurl, decrypted_uevs,
171 |             )
172 | 
173 |             # Run the appropriate build engine based on generator
174 |             if generator == 'jekyll':
175 |                 run_step(
176 |                     setup_ruby,
177 |                     'There was a problem setting up Ruby, see the above logs for details.',
178 |                     federalist_config.should_cache(), post_metrics_p,
179 |                 )
180 | 
181 |                 run_step(
182 |                     setup_bundler,
183 |                     'There was a problem setting up Bundler, see the above logs for details.',
184 |                     federalist_config.should_cache(), bucket, s3_client,
185 |                 )
186 | 
187 |                 run_step(
188 |                     build_jekyll,
189 |                     'There was a problem running Jekyll, see the above logs for details.',
190 |                     branch, owner, repository, site_prefix, baseurl, config, decrypted_uevs,
191 |                 )
192 | 
193 |             elif generator == 'hugo':
194 |                 # extra: --hugo-version (not yet used)
195 |                 run_step(
196 |                     download_hugo,
197 |                     'There was a problem downloading Hugo, see the above logs for details.',
198 |                     post_metrics_p
199 |                 )
200 | 
201 |                 run_step(
202 |                     build_hugo,
203 |                     'There was a problem running Hugo, see the above logs for details.',
204 |                     branch, owner, repository, site_prefix, baseurl, decrypted_uevs,
205 |                 )
206 | 
207 |             elif generator == 'static':
208 |                 # no build arguments are needed
209 |                 build_static()
210 | 
211 |             elif (generator == 'node.js' or generator == 'script only'):
212 |                 logger.info('build already ran in \'npm run federalist\'')
213 | 
214 |             else:
215 |                 raise ValueError(f'Invalid generator: {generator}')
216 | 
217 |             ##
218 |             # PUBLISH
219 |             #
220 |             publish(baseurl, site_prefix, bucket, federalist_config, s3_client)
221 | 
222 |             delta_string = delta_to_mins_secs(datetime.now() - start_time)
223 |             logger.info(f'Total build time: {delta_string}')
224 | 
225 |             # Finished!
226 |             post_build_complete(status_callback, commit_sha)
227 | 
228 |             sys.exit(0)
229 | 
230 |     except StepException as err:
231 |         '''
232 |         Thrown when a step itself fails, usually because a command exited
233 |         with a non-zero return code
234 |         '''
235 |         logger.error(str(err))
236 |         post_build_error(status_callback, str(err), commit_sha)
237 |         sys.exit(1)
238 | 
239 |     except TimeoutException:
240 |         logger.warning(f'Build({build_info}) has timed out')
241 |         post_build_timeout(status_callback, commit_sha)
242 | 
243 |     except Exception as err:  # pylint: disable=W0703
244 |         # Getting here means something really weird has happened
245 |         # since all errors caught during tasks should be caught
246 |         # in the previous block as `UnexpectedExit` exceptions.
247 |         err_string = str(err)
248 | 
249 |         # log the original exception
250 |         msg = f'Unexpected exception raised during build({build_info}): {err_string}'
251 |         if logger:
252 |             logger.warning(msg)
253 |         else:
254 |             print(msg)
255 | 
256 |         err_message = (
257 |             f'Unexpected build({build_info}) error. Please try '
258 |             'again and contact pages-support if it persists.'
259 |         )
260 | 
261 |         post_build_error(status_callback, err_message, commit_sha)
262 |     finally:
263 |         if thread:
264 |             thread.cancel()
265 | 
266 | 
267 | def decrypt_uevs(key, uevs):
268 |     return [{
269 |         'name': uev['name'],
270 |         'value': decrypt(uev['ciphertext'], key)
271 |     } for uev in uevs]
272 | 


--------------------------------------------------------------------------------
/src/common.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Common variables, tasks, and functions
 3 | '''
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | REPO_BASE_URL = 'github.com'
 8 | 
 9 | WORKING_DIR_PATH = Path('/tmp/work')  # nosec
10 | 
11 | CLONE_DIR = 'site_repo'
12 | CLONE_DIR_PATH = WORKING_DIR_PATH / CLONE_DIR
13 | 
14 | SITE_BUILD_DIR = '_site'
15 | SITE_BUILD_DIR_PATH = CLONE_DIR_PATH / SITE_BUILD_DIR
16 | 
17 | STATUS_COMPLETE = 'success'
18 | STATUS_ERROR = 'error'
19 | STATUS_PROCESSING = 'processing'
20 | 


--------------------------------------------------------------------------------
/src/crypto/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/src/crypto/__init__.py


--------------------------------------------------------------------------------
/src/crypto/decrypt.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from cryptography.hazmat.backends import default_backend
 3 | from cryptography.hazmat.primitives.ciphers import (
 4 |     Cipher, algorithms, modes
 5 | )
 6 | 
 7 | 
 8 | def decrypt(ciphertext, key):
 9 |     m = hashlib.sha256()
10 |     m.update(key.encode())
11 |     hashed_key = m.digest()
12 | 
13 |     auth_tag, iv, encrypted = [
14 |         bytes.fromhex(hex) for hex in ciphertext.split(':')
15 |     ]
16 | 
17 |     decryptor = Cipher(
18 |         algorithms.AES(hashed_key),
19 |         modes.GCM(iv, auth_tag),
20 |         backend=default_backend()
21 |     ).decryptor()
22 | 
23 |     return (decryptor.update(encrypted) + decryptor.finalize()).decode()
24 | 


--------------------------------------------------------------------------------
/src/log_utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''Logging stuff'''
2 | 
3 | from .get_logger import get_logger, init_logging
4 | from .delta_to_mins_secs import delta_to_mins_secs
5 | from .monitoring import RepeatTimer, log_monitoring_metrics
6 | 
7 | __all__ = [
8 |     'delta_to_mins_secs', 'get_logger', 'init_logging', 'RepeatTimer', 'log_monitoring_metrics']
9 | 


--------------------------------------------------------------------------------
/src/log_utils/common.py:
--------------------------------------------------------------------------------
1 | STATUS_COMPLETE = 'success'
2 | STATUS_ERROR = 'error'
3 | STATUS_PROCESSING = 'processing'
4 | 


--------------------------------------------------------------------------------
/src/log_utils/db_handler.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import logging
 3 | import psycopg2
 4 | 
 5 | 
 6 | class DBHandler(logging.Handler):
 7 |     def __init__(self, conn_url, build_id):
 8 |         self.conn_url = conn_url
 9 |         self.build_id = build_id
10 |         self.source = 'ALL'
11 | 
12 |         self.conn = None
13 | 
14 |         try:
15 |             self.conn = psycopg2.connect(self.conn_url)
16 |         except Exception:
17 |             raise Exception(f'Cannot connect to {self.conn_url}')
18 | 
19 |         logging.Handler.__init__(self)
20 | 
21 |     def emit(self, record):
22 |         try:
23 |             now = datetime.now()
24 |             self.exec(
25 |                 ('INSERT INTO buildlog '
26 |                  '(build, source, output, "createdAt", "updatedAt") '
27 |                  'VALUES (%s, %s, %s, %s, %s);'),
28 |                 (self.build_id, self.source, self.format(record), now, now)
29 |             )
30 |         except Exception:
31 |             self.handleError(record)
32 | 
33 |     def close(self):
34 |         self.conn.close()
35 | 
36 |     def exec(self, stmt, args):
37 |         cursor = self.conn.cursor()
38 |         cursor.execute(stmt, args)
39 |         self.conn.commit()
40 |         cursor.close()
41 | 


--------------------------------------------------------------------------------
/src/log_utils/delta_to_mins_secs.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta  # noqa pylint: disable=W0611
 2 | 
 3 | 
 4 | def delta_to_mins_secs(delta):
 5 |     '''
 6 |     Converts a timedelta to a string of minutes and seconds.
 7 | 
 8 |     >>> td = timedelta(seconds=55)
 9 |     >>> delta_to_mins_secs(td)
10 |     '55s'
11 | 
12 |     >>> td = timedelta(seconds=124)
13 |     >>> delta_to_mins_secs(td)
14 |     '2m 4s'
15 |     '''
16 |     secs = int(delta.total_seconds())
17 |     if secs > 60:
18 |         mins = int(secs // 60)
19 |         secs = int(secs % 60)
20 |         return f'{mins}m {secs}s'
21 |     # else
22 |     return f'{secs}s'
23 | 


--------------------------------------------------------------------------------
/src/log_utils/get_logger.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Setup nice logs.
  3 | Clients should use the `get_logger` method to get a logger instance.
  4 | '''
  5 | 
  6 | import sys
  7 | import logging
  8 | import logging.handlers
  9 | 
 10 | from .db_handler import DBHandler
 11 | 
 12 | DEFAULT_LOG_LEVEL = logging.INFO
 13 | 
 14 | LOG_ATTRS = {}
 15 | 
 16 | 
 17 | class LogFilter(logging.Filter):
 18 |     '''
 19 |     For every log message, replaces any of the values found in `priv_values`
 20 |     with the provided or default `mask` text. In addition, this prevents empty
 21 |     messages from being logged at all.
 22 |     '''
 23 |     DEFAULT_MASK = '[PRIVATE VALUE HIDDEN]'
 24 |     INVALID_ACCESS_KEY = 'InvalidAccessKeyId'
 25 | 
 26 |     def __init__(self, priv_vals, mask=DEFAULT_MASK):
 27 |         self.priv_vals = priv_vals
 28 |         self.mask = mask
 29 |         logging.Filter.__init__(self)
 30 | 
 31 |     def filter(self, record):
 32 |         for priv_val in self.priv_vals:
 33 |             record.msg = record.msg.replace(priv_val, self.mask)
 34 | 
 35 |         if self.INVALID_ACCESS_KEY in record.msg:
 36 |             record.msg = (
 37 |                 'Whoops, our S3 keys were rotated during your '
 38 |                 'build and became out of date. This was not a '
 39 |                 'problem with your site build, but if you restart '
 40 |                 'the failed build it should work on the next try. '
 41 |                 'Sorry for the inconvenience!'
 42 |             )
 43 | 
 44 |         return len(record.msg) > 0
 45 | 
 46 | 
 47 | class Formatter(logging.Formatter):
 48 |     '''
 49 |     A more forgiving formatter that will fill in blank values if our custom
 50 |     attributes are missing
 51 |     '''
 52 |     def __init__(self, keys, *args, **kwargs):
 53 |         self.keys = keys
 54 |         logging.Formatter.__init__(self, *args, **kwargs)
 55 | 
 56 |     def format(self, record):
 57 |         '''
 58 |         Add missing values before formatting as normal
 59 |         '''
 60 |         for key in self.keys:
 61 |             if (key not in record.__dict__):
 62 |                 record.__dict__[key] = ''
 63 | 
 64 |         return super().format(record)
 65 | 
 66 | 
 67 | def get_logger(name):
 68 |     '''
 69 |     Gets a logger instance configured with our formatter and handler
 70 |     for the given name.
 71 |     '''
 72 |     logger = logging.getLogger(name)
 73 | 
 74 |     return logging.LoggerAdapter(logger, LOG_ATTRS)
 75 | 
 76 | 
 77 | def set_log_attrs(attrs):
 78 |     global LOG_ATTRS
 79 |     LOG_ATTRS = attrs
 80 | 
 81 | 
 82 | def init_logging(private_values, attrs, db_url):
 83 |     global LOG_ATTRS
 84 |     LOG_ATTRS = attrs
 85 | 
 86 |     date_fmt = '%Y-%m-%d %H:%M:%S'
 87 |     style_fmt = '{'
 88 |     short_fmt = '{asctime} {levelname} [{name}] {message}'
 89 |     long_fmt = '{asctime} {levelname} [{name}] '
 90 |     for key in attrs.keys():
 91 |         long_fmt = long_fmt + '@' + key + ': {' + key + '} '
 92 | 
 93 |     long_fmt = long_fmt + '@message: {message}'
 94 | 
 95 |     extra_attrs = attrs.keys()
 96 | 
 97 |     log_filter = LogFilter(private_values)
 98 | 
 99 |     log_level = DEFAULT_LOG_LEVEL
100 | 
101 |     stream_formatter = Formatter(extra_attrs, long_fmt, date_fmt, style_fmt)
102 | 
103 |     stream_handler = logging.StreamHandler(sys.stdout)
104 |     stream_handler.setFormatter(stream_formatter)
105 |     stream_handler.setLevel(log_level)
106 |     stream_handler.addFilter(log_filter)
107 | 
108 |     handlers = [stream_handler]
109 | 
110 |     # configure db logging
111 |     build_id = attrs['buildid']
112 |     db_formatter = logging.Formatter(short_fmt, date_fmt, style_fmt)
113 | 
114 |     db_handler = DBHandler(db_url, build_id)
115 |     db_handler.setFormatter(db_formatter)
116 |     db_handler.setLevel(log_level)
117 |     db_handler.addFilter(log_filter)
118 | 
119 |     handlers.append(db_handler)
120 | 
121 |     logging.basicConfig(level=log_level, handlers=handlers)
122 | 


--------------------------------------------------------------------------------
/src/log_utils/monitoring.py:
--------------------------------------------------------------------------------
 1 | from threading import Timer
 2 | import psutil
 3 | 
 4 | max_metrics = dict(
 5 |     cpu=0,
 6 |     mem=0,
 7 |     disk=0
 8 | )
 9 | 
10 | 
11 | # https://stackoverflow.com/a/48741004
12 | class RepeatTimer(Timer):
13 |     def run(self):
14 |         while not self.finished.wait(self.interval):
15 |             self.function(*self.args, **self.kwargs)
16 | 
17 | 
18 | def log_monitoring_metrics(logger, post_metrics):
19 |     disk = psutil.disk_usage("/")
20 | 
21 |     # compute new maximum metrics and post to the application
22 |     max_metrics["cpu"] = max(psutil.cpu_percent(), max_metrics["cpu"])
23 |     max_metrics["mem"] = max(psutil.virtual_memory().percent, max_metrics["mem"])
24 |     max_metrics["disk"] = max(disk.used, max_metrics["disk"])
25 | 
26 |     post_metrics(dict(machine=max_metrics))
27 | 


--------------------------------------------------------------------------------
/src/log_utils/remote_logs.py:
--------------------------------------------------------------------------------
 1 | '''Functions for sending remote logs'''
 2 | 
 3 | import base64
 4 | import requests
 5 | from typing import Dict
 6 | 
 7 | from .common import (STATUS_COMPLETE, STATUS_ERROR, STATUS_PROCESSING)
 8 | 
 9 | 
10 | def b64string(text):
11 |     '''
12 |     Base64 encodes a string as utf-8
13 | 
14 |     >>> b64string('boop')
15 |     'Ym9vcA=='
16 |     '''
17 |     return base64.b64encode(text.encode('utf-8')).decode('utf-8')
18 | 
19 | 
20 | def post_status(status_callback_url, status, output='', commit_sha=None):
21 |     '''
22 |     POSTs `status` and `output` to the `status_callback_url`
23 |     '''
24 |     requests.post(
25 |         status_callback_url,
26 |         json={
27 |             'status': status,
28 |             'message': b64string(output),
29 |             'commit_sha': commit_sha,
30 |         },
31 |         timeout=10
32 |     )
33 | 
34 | 
35 | def post_build_complete(status_callback_url, commit_sha):
36 |     '''
37 |     POST a STATUS_COMPLETE status to the status_callback_url
38 |     '''
39 |     post_status(status_callback_url, status=STATUS_COMPLETE, commit_sha=commit_sha)
40 | 
41 | 
42 | def post_build_error(status_callback_url, error_output, commit_sha=None):
43 |     '''
44 |     POST a STATUS_ERROR status with message to the status_callback_url
45 |     '''
46 |     # Post to the Pages web application endpoint with status and output
47 |     post_status(
48 |         status_callback_url, status=STATUS_ERROR, output=error_output, commit_sha=commit_sha
49 |     )
50 | 
51 | 
52 | def post_build_processing(status_callback_url):
53 |     '''
54 |     POST a STATUS_PROCESSING status to the status_callback_url
55 |     '''
56 |     post_status(status_callback_url, status=STATUS_PROCESSING)
57 | 
58 | 
59 | def post_build_timeout(status_callback_url, commit_sha=None):
60 |     '''
61 |     POST a STATUS_ERROR status with timeout message to the status_callback_url
62 |     '''
63 |     output = 'The build did not complete. It may have timed out.'
64 | 
65 |     # Post to the Pages web application with status and output
66 |     post_status(status_callback_url, status=STATUS_ERROR, output=output, commit_sha=commit_sha)
67 | 
68 | 
69 | def post_metrics(status_callback_url: str, metrics: Dict):
70 |     '''
71 |     POST build metrics to the metrics API
72 |     '''
73 |     url = status_callback_url.replace('status', 'metrics')
74 |     requests.post(
75 |         url,
76 |         json=metrics,
77 |         timeout=10
78 |     )
79 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import inspect
  3 | import json
  4 | import os
  5 | import shlex
  6 | 
  7 | from build import build
  8 | from crypto.decrypt import decrypt
  9 | 
 10 | 
 11 | KEYS_TO_DECRYPT = [
 12 |     'STATUS_CALLBACK',
 13 |     'GITHUB_TOKEN',
 14 |     'AWS_ACCESS_KEY_ID',
 15 |     'AWS_SECRET_ACCESS_KEY',
 16 |     'BUCKET',
 17 | ]
 18 | 
 19 | 
 20 | def load_vcap():
 21 |     vcap_application = json.loads(os.getenv('VCAP_APPLICATION', '{}'))
 22 |     vcap_services = json.loads(os.getenv('VCAP_SERVICES', '{}'))
 23 | 
 24 |     space = vcap_application['space_name']
 25 | 
 26 |     space_prefix = 'pages-staging' if space == 'pages-staging' else f'federalist-{space}'
 27 | 
 28 |     uev_ups = next(
 29 |         ups for ups in vcap_services['user-provided']
 30 |         if ups['name'] == f'{space_prefix}-uev-key'
 31 |     )
 32 | 
 33 |     uev_env_var = 'USER_ENVIRONMENT_VARIABLE_KEY'
 34 |     os.environ[uev_env_var] = uev_ups['credentials']['key']
 35 | 
 36 | 
 37 | def decrypt_key_value(k, v, encryption_key):
 38 |     if k in KEYS_TO_DECRYPT:
 39 |         return decrypt(v, encryption_key)
 40 |     return v
 41 | 
 42 | 
 43 | def decrypt_params(params):
 44 |     vcap_application = json.loads(os.getenv('VCAP_APPLICATION', '{}'))
 45 |     vcap_services = json.loads(os.getenv('VCAP_SERVICES', '{}'))
 46 | 
 47 |     space = vcap_application['space_name']
 48 | 
 49 |     encryption_ups = next(
 50 |         ups for ups in vcap_services['user-provided']
 51 |         if ups['name'] == f'pages-{space}-encryption'
 52 |     )
 53 | 
 54 |     encryption_key = encryption_ups['credentials']['key']
 55 | 
 56 |     params = {k: decrypt_key_value(k, v, encryption_key) for (k, v) in params.items()}
 57 | 
 58 |     return params
 59 | 
 60 | 
 61 | if __name__ == "__main__":
 62 |     parser = argparse.ArgumentParser(description='Run a pages build')
 63 |     group = parser.add_mutually_exclusive_group(required=True)
 64 |     group.add_argument('-p', '--params', dest='params',
 65 |                        help='A JSON encoded string',
 66 |                        metavar="'{\"foo\": \"bar\"}'")
 67 |     group.add_argument('-f', '--file', dest='file',
 68 |                        help='A path to a JSON file', type=argparse.FileType('r'),
 69 |                        metavar="./foo.json")
 70 |     args = parser.parse_args()
 71 | 
 72 |     if args.params:
 73 |         params = json.loads(args.params)
 74 |         params = decrypt_params(params)
 75 |     else:
 76 |         params = json.load(args.file)
 77 | 
 78 |     params = {k.lower(): v for (k, v) in params.items()}
 79 | 
 80 |     build_arguments = inspect.getfullargspec(build)[0]
 81 |     for k in params:
 82 |         if k not in build_arguments:
 83 |             # Warn about unused arguments
 84 |             print(f'WARNING - Ignoring unused parameter: {k}')
 85 | 
 86 |     # Remove unused build arguments
 87 |     kwargs = {k: v for (k, v) in params.items() if k in build_arguments}
 88 | 
 89 |     if 'user_environment_variables' in kwargs:
 90 |         uevs = kwargs['user_environment_variables']
 91 |         if uevs and isinstance(uevs, str):
 92 |             kwargs['user_environment_variables'] = json.loads(uevs)
 93 | 
 94 |     kwargs['branch'] = shlex.quote(kwargs['branch'])
 95 |     kwargs['owner'] = shlex.quote(kwargs['owner'])
 96 |     kwargs['repository'] = shlex.quote(kwargs['repository'])
 97 | 
 98 |     if os.getenv('VCAP_APPLICATION', None):
 99 |         load_vcap()
100 | 
101 |     build(**kwargs)
102 | 


--------------------------------------------------------------------------------
/src/publishing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/src/publishing/__init__.py


--------------------------------------------------------------------------------
/src/publishing/models.py:
--------------------------------------------------------------------------------
  1 | '''Classes for files published to S3'''
  2 | 
  3 | import binascii
  4 | import gzip
  5 | import hashlib
  6 | import mimetypes
  7 | 
  8 | from datetime import datetime
  9 | from os import path
 10 | 
 11 | mimetypes.init()  # must initialize mimetypes
 12 | 
 13 | 
 14 | def remove_prefix(text, prefix):
 15 |     '''
 16 |     Returns a copy of text with the given prefix removed.
 17 | 
 18 |     >>> remove_prefix('/ab/cd/ef', '/ab/cd')
 19 |     '/ef'
 20 | 
 21 |     >>> remove_prefix('abcd', '/ef')
 22 |     'abcd'
 23 |     '''
 24 |     if text.startswith(prefix):
 25 |         return text[len(prefix):]
 26 |     return text
 27 | 
 28 | 
 29 | class SiteObject():
 30 |     '''
 31 |     An abstract class for an individual object that can be uploaded to S3
 32 |     '''
 33 | 
 34 |     def __init__(self, filename, md5, site_prefix='', dir_prefix=''):
 35 |         self.filename = filename
 36 |         self.md5 = md5
 37 |         self.dir_prefix = dir_prefix
 38 |         self.site_prefix = site_prefix
 39 | 
 40 |     @property
 41 |     def s3_key(self):
 42 |         '''The object's key in the S3 bucket'''
 43 |         filename = self.filename
 44 |         if self.dir_prefix:
 45 |             filename = remove_prefix(filename,
 46 |                                      path.join(self.dir_prefix, ''))
 47 |         return f'{self.site_prefix}/{filename}'
 48 | 
 49 |     def upload_to_s3(self, bucket, s3_client):
 50 |         '''Upload this object to S3'''
 51 |         raise NotImplementedError  # should be implemented in child classes
 52 | 
 53 |     def delete_from_s3(self, bucket, s3_client):
 54 |         '''Delete this object from S3'''
 55 |         s3_client.delete_object(
 56 |             Bucket=bucket,
 57 |             Key=self.s3_key,
 58 |         )
 59 | 
 60 | 
 61 | class SiteFile(SiteObject):
 62 |     '''A file produced during a site build'''
 63 | 
 64 |     GZIP_EXTENSIONS = ['html', 'css', 'js', 'json', 'svg']
 65 | 
 66 |     def __init__(self, filename, dir_prefix, site_prefix, cache_control):
 67 |         super().__init__(filename=filename,
 68 |                          md5=None,
 69 |                          dir_prefix=dir_prefix,
 70 |                          site_prefix=site_prefix)
 71 |         self._compress()
 72 |         self.md5 = self.generate_md5()
 73 |         self.cache_control = cache_control
 74 | 
 75 |     @property
 76 |     def is_compressible(self):
 77 |         '''Whether the file should be compressed'''
 78 |         _, file_extension = path.splitext(self.filename)
 79 |         # file_extension has a preceding '.' character, so use substring
 80 |         return file_extension[1:].lower() in self.GZIP_EXTENSIONS
 81 | 
 82 |     @property
 83 |     def content_encoding(self):
 84 |         '''"gzip" if the file is compressible, otherwise None'''
 85 |         if self.is_compressible:
 86 |             return 'gzip'
 87 |         return None
 88 | 
 89 |     @property
 90 |     def content_type(self):
 91 |         '''The best-guess mimetype of the file'''
 92 |         content_type, _ = mimetypes.guess_type(self.filename)
 93 |         return content_type
 94 | 
 95 |     @property
 96 |     def is_compressed(self):
 97 |         '''Checks to see if the file is already compressed'''
 98 |         with open(self.filename, 'rb') as test_f:
 99 |             # '1f8b' is the magic flag that gzipped files start with
100 |             return binascii.hexlify(test_f.read(2)) == b'1f8b'
101 | 
102 |     def generate_md5(self):
103 |         '''Generates an md5 hash of the file contents'''
104 |         hash_md5 = hashlib.md5()  # nosec
105 | 
106 |         with open(self.filename, 'rb') as file:
107 |             for chunk in iter(lambda: file.read(4096), b""):
108 |                 hash_md5.update(chunk)
109 |         return hash_md5.hexdigest()
110 | 
111 |     def _compress(self):
112 |         '''GZips the file in-situ'''
113 | 
114 |         if not self.is_compressible:
115 |             # shouldn't be compressed, so return
116 |             return
117 | 
118 |         if self.is_compressed:
119 |             # already compressed, so return
120 |             return
121 | 
122 |         # otherwise, gzip the file in place
123 |         with open(self.filename, 'rb') as f_in:
124 |             contents = f_in.read()
125 |             # Spoof the modification time so that MD5 hashes match next time
126 |             spoofed_mtime = datetime(2014, 3, 19).timestamp()  # March 19, 2014
127 |             # Compress the contents and save over the original file
128 |             with gzip.GzipFile(self.filename, mode='wb',
129 |                                mtime=spoofed_mtime) as gz_file:
130 |                 gz_file.write(contents)
131 | 
132 |     def upload_to_s3(self, bucket, s3_client):
133 |         extra_args = {
134 |             "CacheControl": self.cache_control,
135 |             "ServerSideEncryption": "AES256",
136 |         }
137 | 
138 |         if self.content_encoding:
139 |             extra_args["ContentEncoding"] = self.content_encoding
140 |         if self.content_type:
141 |             extra_args["ContentType"] = self.content_type
142 | 
143 |         s3_client.upload_file(
144 |             Filename=self.filename,
145 |             Bucket=bucket,
146 |             Key=self.s3_key,
147 |             # For allowed ExtraArgs, see
148 |             # https://boto3.readthedocs.io/en/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS
149 |             ExtraArgs=extra_args,
150 |         )
151 | 
152 | 
153 | class SiteRedirect(SiteObject):
154 |     '''
155 |     A redirect, typically from `/path/to/page => /path/to/page/`
156 |     '''
157 | 
158 |     def __init__(self, filename, dir_prefix, site_prefix, base_url, cache_control):
159 |         super().__init__(filename=filename,
160 |                          dir_prefix=dir_prefix,
161 |                          md5=None,  # update after super().__init()__
162 |                          site_prefix=site_prefix)
163 | 
164 |         self.base_url = base_url
165 |         self.cache_control = cache_control
166 | 
167 |         # The md5 hash is the hash of the destination string, not
168 |         # of the file contents, for our redirect objects
169 |         self.md5 = hashlib.md5(self.destination.encode()).hexdigest()  # nosec
170 | 
171 |     @property
172 |     def destination(self):
173 |         '''The destination of the redirect object'''
174 |         filename = self.filename
175 | 
176 |         if self.dir_prefix:
177 |             if filename == self.dir_prefix:
178 |                 return f'{self.base_url}/'
179 | 
180 |             filename = remove_prefix(filename,
181 |                                      path.join(self.dir_prefix, ''))
182 | 
183 |         return f'{self.base_url}/{filename}/'
184 | 
185 |     @property
186 |     def s3_key(self):
187 |         filename = self.filename
188 | 
189 |         if self.dir_prefix:
190 |             if filename == self.dir_prefix:
191 |                 # then this is 'root' site redirect object
192 |                 # (ie, the main index.html file)
193 |                 return self.site_prefix
194 | 
195 |             filename = remove_prefix(filename,
196 |                                      path.join(self.dir_prefix, ''))
197 | 
198 |         return f'{self.site_prefix}/{filename}'
199 | 
200 |     def upload_to_s3(self, bucket, s3_client):
201 |         '''Uploads the redirect object to S3'''
202 |         s3_client.put_object(
203 |             Body=self.destination,
204 |             Bucket=bucket,
205 |             Key=self.s3_key,
206 |             ServerSideEncryption='AES256',
207 |             WebsiteRedirectLocation=self.destination,
208 |             CacheControl=self.cache_control
209 |         )
210 | 


--------------------------------------------------------------------------------
/src/publishing/s3publisher.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Classes and methods for publishing a directory to S3
  3 | '''
  4 | 
  5 | import requests
  6 | 
  7 | from os import path, makedirs, walk, getenv
  8 | from concurrent.futures import ThreadPoolExecutor
  9 | 
 10 | from log_utils import get_logger
 11 | from .models import (remove_prefix, SiteObject, SiteFile, SiteRedirect)
 12 | 
 13 | MAX_S3_KEYS_PER_REQUEST = 1000
 14 | FEDERALIST_JSON = 'federalist.json'
 15 | MAX_WORKERS = getenv('MAX_WORKERS', 8)
 16 | 
 17 | 
 18 | def list_remote_objects(bucket, site_prefix, s3_client):
 19 |     '''
 20 | 
 21 |     Generates a list of remote S3 objects that have keys starting with
 22 |     site_prefix in the given bucket.
 23 | 
 24 |     '''
 25 |     results_truncated = True
 26 |     continuation_token = None
 27 | 
 28 |     remote_objects = []
 29 | 
 30 |     while results_truncated:
 31 |         prefix = site_prefix
 32 |         # Add a / to the end of the prefix to prevent
 33 |         # retrieving keys for sites with site_prefixes
 34 |         # that are substrings of others
 35 |         if prefix[-1] != '/':
 36 |             prefix += '/'
 37 | 
 38 |         request_kwargs = {
 39 |             'Bucket': bucket,
 40 |             'MaxKeys': MAX_S3_KEYS_PER_REQUEST,
 41 |             'Prefix': prefix,
 42 |         }
 43 | 
 44 |         if continuation_token:
 45 |             request_kwargs['ContinuationToken'] = continuation_token
 46 | 
 47 |         response = s3_client.list_objects_v2(**request_kwargs)
 48 | 
 49 |         contents = response.get('Contents')
 50 |         if not contents:
 51 |             return remote_objects
 52 | 
 53 |         for response_obj in contents:
 54 |             # remove the site_prefix from the key
 55 |             filename = remove_prefix(response_obj['Key'], site_prefix)
 56 | 
 57 |             # remove initial slash if present
 58 |             filename = remove_prefix(filename, '/')
 59 | 
 60 |             # the etag comes surrounded by double quotes, so remove them
 61 |             md5 = response_obj['ETag'].replace('"', '')
 62 | 
 63 |             site_obj = SiteObject(filename=filename, md5=md5,
 64 |                                   site_prefix=site_prefix)
 65 |             remote_objects.append(site_obj)
 66 | 
 67 |         results_truncated = response['IsTruncated']
 68 |         if results_truncated:
 69 |             continuation_token = response['NextContinuationToken']
 70 | 
 71 |     return remote_objects
 72 | 
 73 | 
 74 | def get_cache_control(federalist_config, filename):
 75 |     return federalist_config.get_headers_for_path(filename).get('cache-control')
 76 | 
 77 | 
 78 | def strip_dirname(filepath, dirname):
 79 |     if dirname and filepath.startswith(dirname):
 80 |         return filepath[len(dirname):]
 81 |     return filepath
 82 | 
 83 | 
 84 | def publish_to_s3(directory, base_url, site_prefix, bucket, federalist_config,
 85 |                   s3_client, dry_run=False):
 86 |     '''Publishes the given directory to S3'''
 87 |     logger = get_logger('publish')
 88 | 
 89 |     # Add local 404 if does not already exist
 90 |     filename_404 = directory + '/404.html'
 91 |     if not path.isfile(filename_404):
 92 |         default_404_url = ('https://raw.githubusercontent.com'
 93 |                            '/cloud-gov/pages-404-page/main/'
 94 |                            '404-pages-client.html')
 95 |         default_404 = requests.get(default_404_url, timeout=10)
 96 |         makedirs(path.dirname(filename_404), exist_ok=True)
 97 |         with open(filename_404, "w+") as f:
 98 |             f.write(default_404.text)
 99 | 
100 |     # Collect a list of all files in `directory``
101 |     local_objects_by_filename = {}
102 | 
103 |     for root, _dirs, filenames in walk(directory):
104 |         for filename in filenames:
105 |             full_path = path.join(root, filename)
106 |             relative_path = strip_dirname(full_path, directory)
107 | 
108 |             if federalist_config.is_path_included(relative_path):
109 |                 cache_control = get_cache_control(federalist_config, relative_path)
110 | 
111 |                 site_file = SiteFile(filename=full_path,
112 |                                      dir_prefix=directory,
113 |                                      site_prefix=site_prefix,
114 |                                      cache_control=cache_control)
115 | 
116 |                 local_objects_by_filename[site_file.filename] = site_file
117 | 
118 |                 if filename == 'index.html':
119 |                     site_redirect = SiteRedirect(filename=root,
120 |                                                  dir_prefix=directory,
121 |                                                  site_prefix=site_prefix,
122 |                                                  base_url=base_url,
123 |                                                  cache_control=cache_control)
124 | 
125 |                     local_objects_by_filename[site_redirect.filename] = site_redirect
126 | 
127 |     if len(local_objects_by_filename) == 0:
128 |         raise RuntimeError('Local build files not found')
129 | 
130 |     # Get list of remote files
131 |     remote_objects = list_remote_objects(bucket=bucket,
132 |                                          site_prefix=site_prefix,
133 |                                          s3_client=s3_client)
134 | 
135 |     # Make dicts by filename of local and remote objects for easier searching
136 |     remote_objects_by_filename = {}
137 |     for obj in remote_objects:
138 |         # These will not have the `directory` prefix that our local
139 |         # files do, so add it so we can more easily compare them.
140 |         filename = path.join(directory, obj.filename)
141 |         remote_objects_by_filename[filename] = obj
142 | 
143 |     # Create lists of all the new and modified objects
144 |     new_objects = []
145 |     replacement_objects = []
146 |     # track whether we can do diffing because of cache control
147 |     default_cache_control = getenv('CACHE_CONTROL', 'max-age=60')
148 |     for local_filename, local_obj in local_objects_by_filename.items():
149 |         matching_remote_obj = remote_objects_by_filename.get(local_filename)
150 |         if not matching_remote_obj:
151 |             new_objects.append(local_obj)
152 |         elif (matching_remote_obj.md5 != local_obj.md5 or
153 |               local_obj.cache_control != default_cache_control):
154 |             replacement_objects.append(local_obj)
155 | 
156 |     # Create a list of the remote objects that should be deleted
157 |     deletion_objects = [
158 |         obj for filename, obj in remote_objects_by_filename.items()
159 |         if not local_objects_by_filename.get(filename)
160 |     ]
161 | 
162 |     if (len(new_objects) == 0 and len(replacement_objects) <= 1 and
163 |             len(local_objects_by_filename) <= 1):
164 |         raise RuntimeError('Cannot unpublish all files')
165 | 
166 |     logger.info('Preparing to upload')
167 |     logger.info(f'New: {len(new_objects)}')
168 |     logger.info(f'Replaced: {len(replacement_objects)}')
169 |     logger.info(f'Deleted: {len(deletion_objects)}')
170 | 
171 |     # Upload new and replacement files
172 |     upload_objects = new_objects + replacement_objects
173 | 
174 |     # task to be run in parallel via threadpool
175 |     def uploader_task(client, file):
176 |         logger.info(f'Uploading {file.s3_key}')
177 |         try:
178 |             file.upload_to_s3(bucket, client)
179 |         except UnicodeEncodeError as err:
180 |             if err.reason == 'surrogates not allowed':
181 |                 logger.warning(
182 |                     f'... unable to upload {file.filename} due '
183 |                     f'to invalid characters in file name.'
184 |                 )
185 |             else:
186 |                 raise
187 | 
188 |     if dry_run:  # pragma: no cover
189 |         for file in upload_objects:
190 |             logger.info(f'Dry-run uploading {file.s3_key}')
191 |     else:
192 |         with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
193 |             for file in upload_objects:
194 |                 executor.submit(uploader_task, s3_client, file)
195 | 
196 |     #  Delete files not needed any more
197 |     for file in deletion_objects:
198 |         if dry_run:  # pragma: no cover
199 |             logger.info(f'Dry run deleting {file.s3_key}')
200 |         else:
201 |             logger.info(f'Deleting {file.s3_key}')
202 | 
203 |             file.delete_from_s3(bucket, s3_client)
204 | 


--------------------------------------------------------------------------------
/src/repo_config/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from os import path
 3 | from .repo_config import RepoConfig
 4 | 
 5 | __all__ = [
 6 |     'RepoConfig', 'from_json_file', 'from_object'
 7 | ]
 8 | 
 9 | PAGES_JSON = 'pages.json'
10 | FEDERALIST_JSON = 'federalist.json'
11 | 
12 | 
13 | def from_json_file(clone_dir, defaults={}):
14 |     obj = {}
15 | 
16 |     json_files = [PAGES_JSON, FEDERALIST_JSON]
17 |     for json_file_name in json_files:
18 |         json_file_path = path.join(clone_dir, json_file_name)
19 |         if path.isfile(json_file_path):
20 |             with open(json_file_path) as json_file:
21 |                 obj = json.load(json_file)
22 |             break
23 | 
24 |     return from_object(obj, defaults)
25 | 
26 | 
27 | def from_object(obj, defaults={}):
28 |     return RepoConfig(obj, defaults)
29 | 


--------------------------------------------------------------------------------
/src/repo_config/repo_config.py:
--------------------------------------------------------------------------------
  1 | import fnmatch
  2 | 
  3 | 
  4 | class RepoConfig:
  5 |     '''
  6 |     Encapsulate the logic for handling the `federalist.json` configuration
  7 | 
  8 |     The file should look something like:
  9 |     {
 10 |         "fullClone": true,
 11 |         "headers": [
 12 |             "/*": {
 13 |                 "cache-control": "no-cache"
 14 |             }
 15 |         ],
 16 |         "excludePaths": [
 17 |             "**/Dockerfile",
 18 |             "/another_excluded_file.yml"
 19 |         ],
 20 |         "includePaths": [
 21 |             "/included_file",
 22 |             "/.well-known/security.txt
 23 |         ],
 24 |         "cache": true
 25 |     }
 26 | 
 27 |     Currently, only the following keys are utilized:
 28 |         - headers
 29 |         - excludePaths
 30 |         - includePaths
 31 |         - fullClone
 32 |         - cache
 33 |     '''
 34 | 
 35 |     def __init__(self, config={}, defaults={}):
 36 |         self.config = config
 37 |         self.defaults = defaults
 38 | 
 39 |     def get_headers_for_path(self, path_to_match):
 40 |         '''
 41 |         Determine the headers that apply to particular filepath
 42 |         '''
 43 | 
 44 |         # A shallow copy should be sufficient
 45 |         resolved_headers = self.defaults.get('headers', {}).copy()
 46 | 
 47 |         first_matching_cfg = find_first_matching_cfg(
 48 |             self.config.get('headers', []),
 49 |             path_to_match)
 50 | 
 51 |         if first_matching_cfg:
 52 |             headers = first_value(first_matching_cfg)
 53 | 
 54 |             for key, value in headers.items():
 55 |                 resolved_headers[key.strip().lower()] = value.strip()
 56 | 
 57 |         return resolved_headers
 58 | 
 59 |     def is_path_excluded(self, path_to_match):
 60 |         return ((contains_dotpath(path_to_match) or self.is_exclude_path_match(path_to_match))
 61 |                 and not self.is_include_path_match(path_to_match))
 62 | 
 63 |     def is_path_included(self, path_to_match):
 64 |         return not self.is_path_excluded(path_to_match)
 65 | 
 66 |     def is_exclude_path_match(self, path_to_match):
 67 |         return is_path_match(self.exclude_paths(), path_to_match)
 68 | 
 69 |     def is_include_path_match(self, path_to_match):
 70 |         return is_path_match(self.include_paths(), path_to_match)
 71 | 
 72 |     def full_clone(self):
 73 |         return self.config.get('fullClone', False) is True
 74 | 
 75 |     def exclude_paths(self):
 76 |         return self.config.get('excludePaths', []) + self.defaults.get('excludePaths', [])
 77 | 
 78 |     def include_paths(self):
 79 |         return self.config.get('includePaths', []) + self.defaults.get('includePaths', [])
 80 | 
 81 |     def should_cache(self):
 82 |         return self.config.get('cache', True) is True
 83 | 
 84 | 
 85 | def contains_dotpath(filename):
 86 |     return any(segment for segment in filename.split('/') if segment.startswith('.'))
 87 | 
 88 | 
 89 | def is_path_match(patterns, path_to_match):
 90 |     for pattern in patterns:
 91 |         if fnmatch.fnmatch(prepend_slash(path_to_match), pattern):
 92 |             return True
 93 | 
 94 |     return False
 95 | 
 96 | 
 97 | def find_first_matching_cfg(configuration_section, path_to_match):
 98 |     '''
 99 |     Find and return the FIRST configuration rule where the `path_to_match` matches
100 |     the configured pattern.
101 | 
102 |     Order is important, so the configuration must be specified and handled as a
103 |     list.
104 | 
105 |     If no path matches, an empty dict is returned.
106 |     '''
107 | 
108 |     return next(
109 |         (configuration_rule
110 |             for configuration_rule
111 |             in configuration_section
112 |             if match_path(first_key(configuration_rule), path_to_match)),
113 |         {})
114 | 
115 | 
116 | def match_path(pattern, path_to_match):
117 |     '''
118 |     Determine if the `path_to_match` matches the path `pattern`
119 | 
120 |     >>> match_path('/*', '/index.html')
121 |     True
122 | 
123 |     >>> match_path('/index.html', '/foo.js')
124 |     False
125 | 
126 |     Patterns can contain the '*' and ':foo' wildcards.
127 | 
128 |     The '*' wildcard will match anything including '/'
129 |     Ex.
130 | 
131 |     >>> match_path('/*', '/foo/bar/baz/index.html')
132 |     True
133 | 
134 |     When combined with an extension, ie '*.html', the wildcard will match
135 |     everything up to the LAST extension in the path to match, which must
136 |     be matched exactly.
137 |     Ex.
138 | 
139 |     >>> match_path('/*.html', '/foo/bar/baz/index.foo.html')
140 |     True
141 | 
142 |     >>> match_path('/*.foo', '/foo/bar/baz/index.foo.html')
143 |     False
144 | 
145 |     The ':foo' wildcard will match anything EXCEPT '/',
146 |     ie it is a single segment wildcard. It can contain any letters after ':'
147 |     Ex.
148 | 
149 |     >>> match_path('/:foo/bar', '/abc/bar')
150 |     True
151 | 
152 |     >>> match_path('/:baz', '/abc/foo')
153 |     False
154 |     '''
155 | 
156 |     # normalize the paths by removing leading slash since that will
157 |     # result in a leading empty string with 'split'ing
158 |     pattern = strip_prefix('/', pattern)
159 |     path_to_match = strip_prefix('/', path_to_match)
160 | 
161 |     pattern_parts = pattern.split('/')
162 |     path_parts = path_to_match.split('/')
163 | 
164 |     for idx, pattern_part in enumerate(pattern_parts):
165 |         if pattern_part == '*':
166 |             return True
167 | 
168 |         if pattern_part.startswith(':'):
169 |             continue
170 | 
171 |         if len(path_parts) <= idx:
172 |             return False
173 | 
174 |         if pattern_part.startswith('*.'):
175 |             pattern_part_ext = pattern_part.split('.')[-1]
176 |             last_path_part = path_parts[-1]
177 |             last_path_ext = last_path_part.split('.')[-1]
178 |             return last_path_ext == pattern_part_ext
179 | 
180 |         path_part = path_parts[idx]
181 | 
182 |         if path_part != pattern_part:
183 |             return False
184 | 
185 |     if len(path_parts) > len(pattern_parts):
186 |         return False
187 | 
188 |     return True
189 | 
190 | 
191 | def first_key(dikt):
192 |     return next(key for key in dikt)
193 | 
194 | 
195 | def first_value(dikt):
196 |     return next(value for value in dikt.values())
197 | 
198 | 
199 | def strip_prefix(prefix, path):
200 |     # Copied from models.py::remove_prefix
201 |     return path[len(prefix):] if path.startswith(prefix) else path
202 | 
203 | 
204 | def prepend_slash(path):
205 |     return path if path.startswith('/') else ('/' + path)
206 | 


--------------------------------------------------------------------------------
/src/runner/__init__.py:
--------------------------------------------------------------------------------
 1 | import grp
 2 | import os
 3 | import pwd
 4 | import shlex
 5 | import subprocess  # nosec
 6 | from io import StringIO
 7 | 
 8 | NVM_PATH = '~/.nvm/nvm.sh'
 9 | RVM_PATH = '/usr/local/rvm/scripts/rvm'
10 | 
11 | 
12 | def setuser():
13 |     os.setgid(grp.getgrnam('rvm').gr_gid)
14 |     os.setuid(pwd.getpwnam('customer').pw_uid)
15 | 
16 | 
17 | def run(logger, command, cwd=None, env=None, shell=False, check=True, node=False, ruby=False, skip_log=False):  # noqa: E501
18 |     '''
19 |     Run an OS command with provided cwd or env, stream logs to logger, and return the exit code.
20 | 
21 |     Errors that occur BEFORE the command is actually executed are caught and handled here.
22 | 
23 |     Errors encountered by the executed command are caught unless `check=False`. In these cases a
24 |     non-zero exit code will be returned to be handled by the caller.
25 | 
26 |     See https://docs.python.org/3/library/subprocess.html#popen-constructor for details.
27 |     '''
28 | 
29 |     if ruby:
30 |         command = f'source {RVM_PATH} && {command}'
31 |         shell = True
32 | 
33 |     if node:
34 |         command = f'source {NVM_PATH} && {command}'
35 |         shell = True
36 | 
37 |     if isinstance(command, str) and not shell:
38 |         command = shlex.split(command)
39 | 
40 |     # When a shell is needed, use `bash` instead of `sh`
41 |     executable = '/bin/bash' if shell else None
42 | 
43 |     # aggregate stdout in case we need to return
44 |     output = StringIO()
45 | 
46 |     try:
47 |         p = subprocess.Popen(  # nosec
48 |             command,
49 |             cwd=cwd,
50 |             env=env,
51 |             shell=shell,
52 |             executable=executable,
53 |             stderr=subprocess.STDOUT,
54 |             stdout=subprocess.PIPE,
55 |             bufsize=1,
56 |             encoding='utf-8',
57 |             text=True,
58 |             preexec_fn=setuser
59 |         )
60 |         while p.poll() is None:
61 |             line = p.stdout.readline().strip()
62 |             if not skip_log:
63 |                 logger.info(line)
64 |             output.write(line)
65 | 
66 |         line = p.stdout.readline().strip()
67 |         if not skip_log:
68 |             logger.info(line)
69 |         output.write(line)
70 | 
71 |         if check:
72 |             if p.returncode:
73 |                 raise subprocess.CalledProcessError(p.returncode, command)
74 |             return output.getvalue()
75 | 
76 |         return p.returncode
77 | 
78 |     # This occurs when Popen itself is called with invalid arguments
79 |     except ValueError as err:
80 |         logger.error('Encountered a problem invoking Popen.')
81 |         logger.error(str(err))
82 | 
83 |         if check:
84 |             raise err
85 | 
86 |         return 1
87 | 
88 |     # This occurs when the command given to Popen cannot be executed.
89 |     # Ex. the file doesn't exist, there was a typo, etc...
90 |     except OSError as err:
91 |         logger.error('Encountered a problem executing `' + ' '.join(command) + '`.')
92 |         logger.error(str(err))
93 | 
94 |         if check:
95 |             raise err
96 | 
97 |         return 1
98 | 


--------------------------------------------------------------------------------
/src/steps/__init__.py:
--------------------------------------------------------------------------------
 1 | from .build import (
 2 |     build_hugo, build_jekyll, build_static,
 3 |     download_hugo, run_build_script, run_step, setup_bundler,
 4 |     setup_node, setup_ruby,
 5 | )
 6 | from .exceptions import StepException
 7 | from .fetch import fetch_repo, update_repo, fetch_commit_sha
 8 | from .publish import publish
 9 | 
10 | 
11 | __all__ = [
12 |     'build_hugo',
13 |     'build_jekyll',
14 |     'build_static',
15 |     'download_hugo',
16 |     'fetch_repo',
17 |     'publish',
18 |     'run_build_script',
19 |     'run_step',
20 |     'setup_bundler',
21 |     'setup_node',
22 |     'setup_ruby',
23 |     'StepException',
24 |     'update_repo',
25 |     'fetch_commit_sha',
26 | ]
27 | 


--------------------------------------------------------------------------------
/src/steps/build.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import shutil
  4 | from os import path
  5 | from pathlib import Path
  6 | import re
  7 | import requests
  8 | import shlex
  9 | import subprocess  # nosec
 10 | import time
 11 | import yaml
 12 | 
 13 | from common import (CLONE_DIR_PATH, SITE_BUILD_DIR, SITE_BUILD_DIR_PATH, WORKING_DIR_PATH)
 14 | from log_utils import get_logger
 15 | from runner import run, setuser
 16 | from .cache import CacheFolder
 17 | from .exceptions import StepException
 18 | 
 19 | HUGO_BIN = 'hugo'
 20 | HUGO_VERSION = '.hugo-version'
 21 | NVMRC = '.nvmrc'
 22 | PACKAGE_JSON = 'package.json'
 23 | PACKAGE_LOCK = 'package-lock.json'
 24 | NODE_MODULES = 'node_modules'
 25 | RUBY_VERSION = '.ruby-version'
 26 | GEMFILE = 'Gemfile'
 27 | GEMFILELOCK = 'Gemfile.lock'
 28 | JEKYLL_CONFIG_YML = '_config.yml'
 29 | BUNDLER_VERSION = '.bundler-version'
 30 | 
 31 | CERTS_PATH = Path('/etc/ssl/certs/ca-certificates.crt')
 32 | RVM_PATH = Path('/usr/local/rvm/scripts/rvm')
 33 | 
 34 | 
 35 | def build_env(branch, owner, repository, site_prefix, base_url,
 36 |               user_env_vars=[]):
 37 |     '''Creates a dict of environment variables to pass into a build context'''
 38 |     env = {
 39 |         'BRANCH': branch,
 40 |         'OWNER': owner,
 41 |         'REPOSITORY': repository,
 42 |         'SITE_PREFIX': site_prefix,
 43 |         'BASEURL': base_url,
 44 |         # necessary to make sure build engines use utf-8 encoding
 45 |         'LANG': 'en_US.UTF-8',
 46 |         'GATSBY_TELEMETRY_DISABLED': '1',
 47 |         # Not that folks should really be using `pry` on Pages but
 48 |         # https://github.com/pry/pry/pull/2165
 49 |         'HOME': '/home/customer',
 50 |     }
 51 | 
 52 |     for uev in user_env_vars:
 53 |         name = uev['name']
 54 |         value = uev['value']
 55 |         if name in env or name.upper() in env:
 56 |             print(
 57 |                 f'user environment variable name `{name}` conflicts '
 58 |                 'with system environment variable, it will be ignored.'
 59 |             )
 60 |         else:
 61 |             env[name] = value
 62 | 
 63 |     return env
 64 | 
 65 | 
 66 | def build_static():
 67 |     '''Moves all files from CLONE_DIR into SITE_BUILD_DIR'''
 68 |     logger = get_logger('build-static')
 69 | 
 70 |     dir = path.join(CLONE_DIR_PATH, '.git')
 71 |     logger.info(f'Cleaning {dir}')
 72 |     shutil.rmtree(dir, ignore_errors=True)
 73 | 
 74 |     logger.info(f'Moving files to {SITE_BUILD_DIR}')
 75 | 
 76 |     # Make the site build directory first
 77 |     SITE_BUILD_DIR_PATH.mkdir(exist_ok=True)
 78 | 
 79 |     files = os.listdir(CLONE_DIR_PATH)
 80 | 
 81 |     for file in files:
 82 |         # don't move the SITE_BUILD_DIR dir into itself
 83 |         if file is not SITE_BUILD_DIR:
 84 |             shutil.move(str(CLONE_DIR_PATH / file),
 85 |                         str(SITE_BUILD_DIR_PATH))
 86 | 
 87 | 
 88 | def has_build_script(script_name):
 89 |     '''
 90 |     Checks for existence of the script (ie: "federalist", "pages") in the
 91 |     cloned repo's package.json.
 92 |     '''
 93 |     PACKAGE_JSON_PATH = CLONE_DIR_PATH / PACKAGE_JSON
 94 |     if PACKAGE_JSON_PATH.is_file():
 95 |         with PACKAGE_JSON_PATH.open() as json_file:
 96 |             package_json = json.load(json_file)
 97 |             return script_name in package_json.get('scripts', {})
 98 | 
 99 |     return False
100 | 
101 | 
102 | def check_supported_ruby_version(version):
103 |     '''
104 |     Checks if the version defined in .ruby-version is supported
105 |     Raises a generic exception if not
106 |     '''
107 |     is_supported = 0
108 | 
109 |     if version:
110 |         logger = get_logger('setup-ruby')
111 | 
112 |         RUBY_VERSION_MIN = os.getenv('RUBY_VERSION_MIN')
113 |         is_supported = run(
114 |             logger,
115 |             f'ruby -e "exit Gem::Version.new(\'{shlex.split(version)[0]}\') >= Gem::Version.new(\'{RUBY_VERSION_MIN}\') ? 1 : 0"',  # noqa: E501
116 |             cwd=CLONE_DIR_PATH,
117 |             env={},
118 |             ruby=True,
119 |             check=False,
120 |         )
121 | 
122 |         upgrade_msg = 'Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.'  # noqa: E501
123 | 
124 |         if version == RUBY_VERSION_MIN:
125 |             logger.warning(
126 |                 f'WARNING: Ruby {RUBY_VERSION_MIN} will soon reach end-of-life, at which point Pages will no longer support it.')  # noqa: E501
127 |             logger.warning(upgrade_msg)
128 | 
129 |         if not is_supported:
130 |             error = 'ERROR: Unsupported ruby version specified in .ruby-version.'
131 |             logger.error(error)
132 |             logger.error(upgrade_msg)
133 |             raise Exception(error)
134 | 
135 | 
136 | def setup_node(should_cache: bool, bucket, s3_client, post_metrics):
137 |     '''
138 |     Sets up node and installs dependencies.
139 | 
140 |     Uses the node version specified in the cloned repo's .nvmrc
141 |     file if it is present.
142 |     '''
143 |     logger = get_logger('setup-node')
144 | 
145 |     def runp(cmd, skip_log=False):
146 |         return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, node=True, skip_log=skip_log)
147 | 
148 |     NVMRC_PATH = CLONE_DIR_PATH / NVMRC
149 |     if NVMRC_PATH.is_file():
150 |         # nvm will output the node and npm versions used
151 |         logger.info('Checking node version specified in .nvmrc')
152 |         runp("""
153 |             RAW_VERSION=$(nvm version-remote $(cat .nvmrc))
154 |             MAJOR_VERSION=$(echo $RAW_VERSION | cut -d. -f 1 | cut -dv -f 2)
155 |             if [[ "$MAJOR_VERSION" =~ ^(18|20|22)$ ]]; then
156 |                 echo "Switching to node version $RAW_VERSION specified in .nvmrc"
157 | 
158 |                 if [[ "$MAJOR_VERSION" -eq 18 ]]; then
159 |                     echo "WARNING: Node $RAW_VERSION will reach end-of-life on 2025-04-30, at which point Pages will no longer support it."
160 |                     echo "Please upgrade to LTS major version 20 or 22, see https://nodejs.org/en/about/releases/ for details."
161 |                 fi
162 | 
163 |                 nvm install $RAW_VERSION
164 |                 nvm alias default $RAW_VERSION
165 |             else
166 |                 echo "Unsupported node major version '$MAJOR_VERSION' specified in .nvmrc."
167 |                 echo "Please upgrade to LTS major version 20 or 22, see https://nodejs.org/en/about/releases/ for details."
168 |                 exit 1
169 |             fi
170 |         """)  # noqa: E501
171 |     else:
172 |         # output node and npm versions if the defaults are used
173 |         logger.info('Using default node version')
174 |         runp('nvm alias default $(nvm version)')
175 |         runp('echo Node version: $(node --version)')
176 |         runp('echo NPM version: $(npm --version)')
177 | 
178 |     # capture version and cache
179 |     node_version = runp('node --version', skip_log=True)
180 |     post_metrics({
181 |         "engines": {
182 |             "node": dict(version=node_version, cache=should_cache)
183 |         }
184 |     })
185 | 
186 |     cache_folder = None
187 |     PACKAGE_LOCK_PATH = CLONE_DIR_PATH / PACKAGE_LOCK
188 |     if PACKAGE_LOCK_PATH.is_file():
189 |         if should_cache:
190 |             logger.info(f'{PACKAGE_LOCK} found. Attempting to download cache')
191 |             NM_FOLDER = CLONE_DIR_PATH / NODE_MODULES
192 |             cache_folder = CacheFolder(PACKAGE_LOCK_PATH, NM_FOLDER, bucket, s3_client, logger)
193 |             cache_folder.download_unzip()
194 | 
195 |     if PACKAGE_LOCK_PATH.is_file():
196 |         if should_cache and cache_folder.exists():
197 |             logger.info('skipping npm ci and using cache')
198 |         else:
199 |             logger.info('Installing dependencies in package-lock.json')
200 |             runp('npm set audit false')
201 |             runp('npm ci')
202 | 
203 |     if PACKAGE_LOCK_PATH.is_file() and should_cache:
204 |         if not cache_folder.exists():
205 |             cache_folder.zip_upload_folder_to_s3()
206 | 
207 | 
208 | def run_build_script(branch, owner, repository, site_prefix,
209 |                      base_url='', user_env_vars=[]):
210 |     '''
211 |     Runs the npm build (ie: "federalist","pages", ...) script if it is defined
212 |     '''
213 | 
214 |     scripts = ["pages", "federalist"]
215 |     for script_name in scripts:
216 |         if has_build_script(script_name):
217 |             logger = get_logger(f'run-{script_name}-script')
218 |             logger.info(f'Running {script_name} build script in package.json')
219 |             env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars)
220 |             run(logger, f'npm run {script_name}', cwd=CLONE_DIR_PATH, env=env, node=True)
221 |             return
222 | 
223 | 
224 | def run_step(step, msg, *args, **kwargs):
225 |     try:
226 |         step(*args, **kwargs)
227 |     except Exception:
228 |         raise StepException(msg)
229 | 
230 | 
231 | def download_hugo(post_metrics):
232 |     logger = get_logger('download-hugo')
233 | 
234 |     HUGO_VERSION_PATH = CLONE_DIR_PATH / HUGO_VERSION
235 |     if HUGO_VERSION_PATH.is_file():
236 |         logger.info('.hugo-version found')
237 |         hugo_version = ''
238 |         with HUGO_VERSION_PATH.open() as hugo_vers_file:
239 |             try:
240 |                 hugo_version = hugo_vers_file.readline().strip()
241 |                 hugo_version = shlex.quote(hugo_version)
242 |                 regex = r'^(extended_)?[\d]+(\.[\d]+)*$'
243 |                 hugo_version = re.search(regex, hugo_version).group(0)
244 |             except Exception:
245 |                 raise RuntimeError('Invalid .hugo-version')
246 | 
247 |         if hugo_version:
248 |             logger.info(f'Using hugo version in .hugo-version: {hugo_version}')
249 |             post_metrics({
250 |                 "engines": {
251 |                     "hugo": dict(version=hugo_version)
252 |                 }
253 |             })
254 |     else:
255 |         raise RuntimeError(".hugo-version not found")
256 |     '''
257 |     Downloads the specified version of Hugo
258 |     '''
259 |     logger.info(f'Downloading hugo version {hugo_version}')
260 |     failed_attempts = 0
261 |     while (failed_attempts < 5):
262 |         try:
263 |             dl_url = ('https://github.com/gohugoio/hugo/releases/download/v'
264 |                       + hugo_version.split('_')[-1] +
265 |                       f'/hugo_{hugo_version}_Linux-64bit.tar.gz')
266 |             response = requests.get(dl_url, verify=CERTS_PATH, timeout=10)
267 | 
268 |             hugo_tar_path = WORKING_DIR_PATH / 'hugo.tar.gz'
269 |             with hugo_tar_path.open('wb') as hugo_tar:
270 |                 for chunk in response.iter_content(chunk_size=128):
271 |                     hugo_tar.write(chunk)
272 | 
273 |             HUGO_BIN_PATH = WORKING_DIR_PATH / HUGO_BIN
274 |             run(logger, f'tar -xzf {hugo_tar_path} -C {WORKING_DIR_PATH}', env={})
275 |             run(logger, f'chmod +x {HUGO_BIN_PATH}', env={})
276 |             return
277 |         except Exception:
278 |             failed_attempts += 1
279 |             logger.info(
280 |                 f'Failed attempt #{failed_attempts} to download hugo version: {hugo_version}'
281 |             )
282 |             if failed_attempts == 5:
283 |                 raise RuntimeError(f'Unable to download hugo version: {hugo_version}')
284 |             time.sleep(2)  # try again in 2 seconds
285 | 
286 | 
287 | def build_hugo(branch, owner, repository, site_prefix,
288 |                base_url='', user_env_vars=[]):
289 |     '''
290 |     Builds the cloned site with Hugo
291 |     '''
292 |     logger = get_logger('build-hugo')
293 | 
294 |     HUGO_BIN_PATH = WORKING_DIR_PATH / HUGO_BIN
295 | 
296 |     run(logger, f'echo hugo version: $({HUGO_BIN_PATH} version)', env={})
297 | 
298 |     logger.info('Building site with hugo')
299 | 
300 |     hugo_args = f'--source {CLONE_DIR_PATH} --destination {SITE_BUILD_DIR_PATH}'
301 |     if base_url:
302 |         hugo_args += f' --baseURL {base_url}'
303 | 
304 |     env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars)
305 |     run(logger, f'{HUGO_BIN_PATH} {hugo_args}', cwd=CLONE_DIR_PATH, env=env, node=True)
306 | 
307 | 
308 | def setup_ruby(should_cache, post_metrics):
309 |     '''
310 |     Sets up RVM and installs ruby
311 |     Uses the ruby version specified in .ruby-version if present
312 |     '''
313 | 
314 |     logger = get_logger('setup-ruby')
315 | 
316 |     def runp(cmd, skip_log=False):
317 |         return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, ruby=True, skip_log=skip_log)
318 | 
319 |     RUBY_VERSION_PATH = CLONE_DIR_PATH / RUBY_VERSION
320 |     if RUBY_VERSION_PATH.is_file():
321 |         logger.info('Using ruby version in .ruby-version')
322 |         with RUBY_VERSION_PATH.open() as ruby_vers_file:
323 |             ruby_version = ruby_vers_file.readline().strip()
324 |             # escape-quote the value in case there's anything weird
325 |             # in the .ruby-version file
326 |             ruby_version = shlex.quote(ruby_version)
327 |         check_supported_ruby_version(ruby_version)
328 |         runp(f'rvm install {ruby_version}')
329 | 
330 |     ruby_version = runp('ruby -v', skip_log=True)
331 |     post_metrics({
332 |         "engines": {
333 |             "ruby": dict(version=ruby_version, cache=should_cache)
334 |         }
335 |     })
336 |     runp('echo Ruby version: $(ruby -v)')
337 | 
338 | 
339 | def setup_bundler(should_cache: bool, bucket, s3_client):
340 |     logger = get_logger('setup-bundler')
341 | 
342 |     def runp(cmd):
343 |         return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, ruby=True)
344 | 
345 |     GEMFILE_PATH = CLONE_DIR_PATH / GEMFILE
346 |     GEMFILELOCK_PATH = CLONE_DIR_PATH / GEMFILELOCK
347 | 
348 |     if not GEMFILE_PATH.is_file():
349 |         logger.info('No Gemfile found, installing Jekyll.')
350 |         return runp('gem install jekyll -v 4.2.2 --no-document')
351 | 
352 |     logger.info('Gemfile found, setting up bundler')
353 | 
354 |     version = '<2'
355 | 
356 |     BUNDLER_VERSION_PATH = CLONE_DIR_PATH / BUNDLER_VERSION
357 | 
358 |     if BUNDLER_VERSION_PATH.is_file():
359 |         with BUNDLER_VERSION_PATH.open() as bundler_vers_file:
360 |             try:
361 |                 bundler_vers = bundler_vers_file.readline().strip()
362 |                 # escape-quote the value in case there's anything weird
363 |                 # in the .bundler-version file
364 |                 bundler_vers = shlex.quote(bundler_vers)
365 |                 regex = r'^[\d]+(\.[\d]+)*$'
366 |                 bundler_vers = re.search(regex, bundler_vers).group(0)
367 |                 if bundler_vers:
368 |                     logger.info('Using bundler version in .bundler-version')
369 |                     version = bundler_vers
370 |             except Exception:
371 |                 raise RuntimeError('Invalid .bundler-version')
372 | 
373 |     runp(f'gem install bundler --version "{version}"')
374 | 
375 |     cache_folder = None
376 |     if GEMFILELOCK_PATH.is_file() and should_cache:
377 |         logger.info(f'{GEMFILELOCK} found. Attempting to download cache')
378 |         GEMFOLDER = subprocess.run(  # nosec
379 |             f'source {RVM_PATH} && rvm gemdir',
380 |             cwd=CLONE_DIR_PATH,
381 |             shell=True,
382 |             executable='/bin/bash',
383 |             capture_output=True,
384 |             preexec_fn=setuser
385 |         )
386 |         GEMFOLDER = GEMFOLDER.stdout.decode('utf-8').strip()
387 |         cache_folder = CacheFolder(GEMFILELOCK_PATH, GEMFOLDER, bucket, s3_client, logger)
388 |         cache_folder.download_unzip()
389 | 
390 |     logger.info('Installing dependencies in Gemfile')
391 |     runp('bundle install')
392 | 
393 |     if GEMFILELOCK_PATH.is_file() and should_cache:
394 |         # we also need to check for cache_folder here because we shouldn't cache if they didn't
395 |         # initially have a lockfile (bundle install creates one)
396 |         if cache_folder and not cache_folder.exists():
397 |             cache_folder.zip_upload_folder_to_s3()
398 | 
399 | 
400 | def update_jekyll_config(federalist_config={}, custom_config_path=''):
401 |     logger = get_logger('build-jekyll')
402 | 
403 |     JEKYLL_CONF_YML_PATH = CLONE_DIR_PATH / JEKYLL_CONFIG_YML
404 | 
405 |     config_yml = {}
406 |     with JEKYLL_CONF_YML_PATH.open('r') as jekyll_conf_file:
407 |         config_yml = yaml.safe_load(jekyll_conf_file)
408 | 
409 |     custom_config = {}
410 |     if custom_config_path:
411 |         try:
412 |             custom_config = json.loads(custom_config_path)
413 |         except json.JSONDecodeError:
414 |             error = 'Could not load/parse custom yaml config.'
415 |             logger.error(error)
416 |             raise Exception(error)
417 | 
418 |     config_yml = {**config_yml, **custom_config, **federalist_config}
419 | 
420 |     with JEKYLL_CONF_YML_PATH.open('w') as jekyll_conf_file:
421 |         yaml.dump(config_yml, jekyll_conf_file, default_flow_style=False)
422 | 
423 | 
424 | def build_jekyll(branch, owner, repository, site_prefix,
425 |                  base_url='', config='', user_env_vars=[]):
426 |     '''
427 |     Builds the cloned site with Jekyll
428 |     '''
429 |     logger = get_logger('build-jekyll')
430 | 
431 |     update_jekyll_config(
432 |         dict(baseurl=base_url, branch=branch),
433 |         config
434 |     )
435 | 
436 |     jekyll_cmd = 'jekyll'
437 | 
438 |     GEMFILE_PATH = CLONE_DIR_PATH / GEMFILE
439 |     if GEMFILE_PATH.is_file():
440 |         jekyll_cmd = f'bundle exec {jekyll_cmd}'
441 | 
442 |     run(
443 |         logger,
444 |         f'echo Building using Jekyll version: $({jekyll_cmd} -v)',
445 |         cwd=CLONE_DIR_PATH,
446 |         env={},
447 |         ruby=True
448 |     )
449 | 
450 |     env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars)
451 |     env['JEKYLL_ENV'] = 'production'
452 | 
453 |     run(
454 |         logger,
455 |         f'{jekyll_cmd} build --destination {SITE_BUILD_DIR_PATH}',
456 |         cwd=CLONE_DIR_PATH,
457 |         env=env,
458 |         node=True,
459 |         ruby=True
460 |     )
461 | 


--------------------------------------------------------------------------------
/src/steps/cache.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | import os
 3 | import hashlib
 4 | import shutil
 5 | import botocore
 6 | 
 7 | # Cache expiration time
 8 | NEXT_MONTH = datetime.now() + timedelta(days=30)
 9 | ARCHIVE_METHOD = 'tar'
10 | 
11 | 
12 | def get_checksum(filename):
13 |     m = hashlib.md5()  # nosec
14 |     with open(filename, 'rb') as f:
15 |         while chunk := f.read(4096):
16 |             m.update(chunk)
17 |     return m.hexdigest()
18 | 
19 | 
20 | class CacheFolder():
21 |     '''
22 |     An abstract class for a cache folder in S3
23 |     '''
24 | 
25 |     def __init__(self, checksum_file, local_folder, bucket, s3_client, logger):
26 |         self.checksum_file = checksum_file
27 |         self.key = get_checksum(checksum_file)
28 |         self.local_folder = local_folder
29 |         self.bucket = bucket
30 |         self.s3_client = s3_client
31 |         self.logger = logger
32 | 
33 |     def exists(self):
34 |         '''Check if a given cache key exists'''
35 |         try:
36 |             self.s3_client.head_object(
37 |                 Bucket=self.bucket,
38 |                 Key=f'_cache/{self.key}'
39 |             )
40 |             return True
41 |         except botocore.exceptions.ClientError as error:
42 |             if error.response['Error']['Message'] == 'Not Found':
43 |                 return False
44 |             else:
45 |                 self.logger.error(error.response['Error'])
46 |                 raise error
47 | 
48 |     def zip_upload_folder_to_s3(self):
49 |         self.logger.info(f'Caching dependencies from {self.local_folder}.')
50 |         tmp_file = f'{self.key}.{ARCHIVE_METHOD}'
51 |         shutil.make_archive(self.key, ARCHIVE_METHOD, self.local_folder)
52 |         self.logger.info(f'Created archive {tmp_file}')
53 |         self.s3_client.upload_file(
54 |             Filename=tmp_file,
55 |             Bucket=self.bucket,
56 |             Key=f'_cache/{self.key}',
57 |             ExtraArgs=dict(Expires=NEXT_MONTH)
58 |         )
59 |         os.unlink(tmp_file)
60 | 
61 |     def download_unzip(self):
62 |         if self.exists():
63 |             self.logger.info(f'Dependency cache found, downloading to {self.local_folder}.')
64 |             tmp_file = f'{self.key}.{ARCHIVE_METHOD}'
65 |             self.s3_client.download_file(
66 |                 Filename=tmp_file,
67 |                 Bucket=self.bucket,
68 |                 Key=f'_cache/{self.key}'
69 |             )
70 |             shutil.unpack_archive(tmp_file, self.local_folder, ARCHIVE_METHOD)
71 |             os.unlink(tmp_file)
72 |         else:
73 |             self.logger.info('No cache file found.')
74 | 


--------------------------------------------------------------------------------
/src/steps/exceptions.py:
--------------------------------------------------------------------------------
1 | class StepException(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/src/steps/fetch.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Fetch tasks and helpers
  3 | '''
  4 | import shlex
  5 | import subprocess  # nosec
  6 | 
  7 | from log_utils import get_logger
  8 | from runner import run
  9 | from common import (REPO_BASE_URL, CLONE_DIR_PATH)
 10 | from steps import StepException
 11 | 
 12 | 
 13 | def fetch_url(owner, repository, access_token=''):  # nosec
 14 |     '''
 15 |     Creates a URL to a remote git repository.
 16 |     If `access_token` is specified, it will be included in the authentication
 17 |     section of the returned URL.
 18 | 
 19 |     >>> fetch_url('owner', 'repo')
 20 |     'https://github.com/owner/repo.git'
 21 | 
 22 |     >>> fetch_url('owner2', 'repo2', 'secret-token')
 23 |     'https://secret-token@github.com/owner2/repo2.git'
 24 |     '''
 25 |     repo_url = f'{REPO_BASE_URL}/{owner}/{repository}.git'
 26 |     if access_token:
 27 |         repo_url = f'{access_token}@{repo_url}'
 28 | 
 29 |     return f'https://{repo_url}'
 30 | 
 31 | 
 32 | def fetch_repo(owner, repository, branch, github_token=''):  # nosec
 33 |     '''
 34 |     Clones the GitHub repository specified by owner and repository
 35 |     into CLONE_DIR_PATH.
 36 |     '''
 37 |     logger = get_logger('clone')
 38 | 
 39 |     owner = shlex.quote(owner)
 40 |     repository = shlex.quote(repository)
 41 |     branch = shlex.quote(branch)
 42 | 
 43 |     clone_env = {
 44 |         'HOME': '/home'
 45 |     }
 46 | 
 47 |     command = (
 48 |         f'git clone -b {branch} --single-branch --depth 1 '
 49 |         f'{fetch_url(owner, repository, github_token)} '
 50 |         f'{CLONE_DIR_PATH}'
 51 |     )
 52 | 
 53 |     return run(logger, command, env=clone_env, check=False)
 54 | 
 55 | 
 56 | def update_repo(clone_dir):
 57 |     '''
 58 |     Updates the repo with the full git history
 59 |     '''
 60 |     logger = get_logger('update')
 61 | 
 62 |     logger.info('Fetching full git history')
 63 | 
 64 |     command = 'git pull --unshallow'
 65 | 
 66 |     return run(logger, command, cwd=clone_dir)
 67 | 
 68 | 
 69 | def fetch_commit_sha(clone_dir):
 70 |     '''
 71 |     fetch the last commitSHA
 72 |     '''
 73 |     try:
 74 |         logger = get_logger('clone')
 75 |         logger.info('Fetching commit details ...')
 76 |         # prior to running commands on the repo, make sure it isn't "dubious"
 77 |         # "detected dubious ownership in repository"
 78 |         git_command = shlex.split(f'git config --global --add safe.directory {clone_dir}')
 79 |         subprocess.run(  # nosec
 80 |             git_command,
 81 |             shell=False,
 82 |             check=True,
 83 |             stdout=subprocess.PIPE,
 84 |             universal_newlines=True,
 85 |             cwd=clone_dir
 86 |         )
 87 |         command = shlex.split('git log -1')  # get last commit only
 88 |         process = subprocess.run(  # nosec
 89 |             command,
 90 |             shell=False,
 91 |             check=True,
 92 |             stdout=subprocess.PIPE,
 93 |             universal_newlines=True,
 94 |             cwd=clone_dir
 95 |         )
 96 |         commit_log = process.stdout
 97 |         commit_sha = commit_log.split()[1]
 98 |         logger.info(f'commit {commit_sha}')
 99 |         return commit_sha
100 |     except Exception:
101 |         raise StepException('There was a problem fetching the commit hash for this build')
102 | 


--------------------------------------------------------------------------------
/src/steps/publish.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Publish tasks and helpers
 3 | '''
 4 | from datetime import datetime
 5 | 
 6 | from publishing import s3publisher
 7 | 
 8 | from log_utils import delta_to_mins_secs, get_logger
 9 | from common import SITE_BUILD_DIR_PATH
10 | 
11 | 
12 | def publish(base_url, site_prefix, bucket, federalist_config,
13 |             s3_client, dry_run=False):
14 |     '''
15 |     Publish the built site to S3.
16 |     '''
17 |     logger = get_logger('publish')
18 | 
19 |     logger.info('Publishing to S3')
20 | 
21 |     start_time = datetime.now()
22 | 
23 |     s3publisher.publish_to_s3(
24 |         directory=str(SITE_BUILD_DIR_PATH),
25 |         base_url=base_url,
26 |         site_prefix=site_prefix,
27 |         bucket=bucket,
28 |         federalist_config=federalist_config,
29 |         s3_client=s3_client,
30 |         dry_run=dry_run
31 |     )
32 | 
33 |     delta_string = delta_to_mins_secs(datetime.now() - start_time)
34 |     logger.info(f'Total time to publish: {delta_string}')
35 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/__init__.py


--------------------------------------------------------------------------------
/test/publishing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/publishing/__init__.py


--------------------------------------------------------------------------------
/test/publishing/test_models.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | 
  3 | from unittest.mock import Mock
  4 | 
  5 | import pytest
  6 | 
  7 | from publishing.models import SiteObject, SiteFile, SiteRedirect
  8 | from ..support import generate_file_hash
  9 | 
 10 | 
 11 | class TestSiteObject():
 12 |     def test_constructor(self):
 13 |         model = SiteObject(
 14 |             filename='boop',
 15 |             md5='md5',
 16 |             dir_prefix='dir_prefix',
 17 |             site_prefix='site_prefix'
 18 |         )
 19 |         assert model is not None
 20 | 
 21 |         # default params are used
 22 |         model = SiteObject(filename='boop2', md5='abc')
 23 |         assert model is not None
 24 |         assert model.dir_prefix == ''
 25 |         assert model.site_prefix == ''
 26 | 
 27 |     def test_s3_key(self):
 28 |         model = SiteObject('abc', 'md5', site_prefix='site')
 29 |         assert model.s3_key == 'site/abc'
 30 | 
 31 |         model = SiteObject('/dir/abc', 'md5',
 32 |                            dir_prefix='/dir', site_prefix='site')
 33 |         assert model.s3_key == 'site/abc'
 34 | 
 35 |         model = SiteObject('/not_dir/abc', 'md5',
 36 |                            dir_prefix='/dir', site_prefix='site')
 37 |         assert model.s3_key == 'site//not_dir/abc'
 38 | 
 39 |     def test_delete_from_s3(self):
 40 |         s3_client = Mock()
 41 | 
 42 |         model = SiteObject('/dir/abc', 'md5',
 43 |                            dir_prefix='/dir', site_prefix='site')
 44 |         model.delete_from_s3('test-bucket', s3_client)
 45 |         s3_client.delete_object.assert_called_once_with(
 46 |             Bucket='test-bucket',
 47 |             Key='site/abc')
 48 | 
 49 |     def test_upload_to_s3(self):
 50 |         model = SiteObject('abc', 'md5')
 51 |         # Base SiteObject should not have this method implemented
 52 |         # because it is specific to file and redirect objects
 53 |         with pytest.raises(NotImplementedError):
 54 |             model.upload_to_s3('bucket', None)
 55 | 
 56 | 
 57 | class TestSiteFile():
 58 |     @pytest.mark.parametrize('filename, is_compressible', [
 59 |         ('test_file.html', True),
 60 |         ('test_file.css', True),
 61 |         ('test_file.js', True),
 62 |         ('test_file.json', True),
 63 |         ('test_file.svg', True),
 64 |         ('test_file.txt', False),
 65 |         ('test_file.exe', False),
 66 |     ])
 67 |     def test_is_compressible(self, tmpdir, filename, is_compressible):
 68 |         test_dir = tmpdir.mkdir('a_dir')
 69 |         test_file = test_dir.join(filename)
 70 |         test_file.write('something something')
 71 |         model = SiteFile(
 72 |             filename=str(test_file),
 73 |             dir_prefix=str(test_dir),
 74 |             site_prefix='/site',
 75 |             cache_control='max-age=60')
 76 |         assert model.is_compressible == is_compressible
 77 | 
 78 |     def test_non_compressible_file(self, tmpdir):
 79 |         test_dir = tmpdir.mkdir('boop')
 80 |         test_file = test_dir.join('test_file.txt')
 81 |         test_file.write('content')
 82 |         model = SiteFile(
 83 |             filename=str(test_file),
 84 |             dir_prefix=str(test_dir),
 85 |             site_prefix='/site',
 86 |             cache_control='max-age=60')
 87 | 
 88 |         assert model is not None
 89 | 
 90 |         # hardcoded md5 hash of 'content'
 91 |         assert model.md5 == '9a0364b9e99bb480dd25e1f0284c8555'
 92 |         assert model.s3_key == '/site/test_file.txt'
 93 |         assert model.dir_prefix == str(test_dir)
 94 |         assert model.content_encoding is None
 95 |         assert model.content_type == 'text/plain'
 96 | 
 97 |         # Make sure uploads is called correctly
 98 |         s3_client = Mock()
 99 |         model.upload_to_s3('test-bucket', s3_client)
100 |         s3_client.upload_file.assert_called_once_with(
101 |             Filename=str(test_file),
102 |             Bucket='test-bucket',
103 |             Key='/site/test_file.txt',
104 |             ExtraArgs={
105 |                 'CacheControl': 'max-age=60',
106 |                 'ServerSideEncryption': 'AES256',
107 |                 'ContentType': 'text/plain',
108 |             },
109 |         )
110 | 
111 |     def test_compressible_file(self, tmpdir):
112 |         test_dir = tmpdir.mkdir('boop')
113 | 
114 |         # .html files are compressible
115 |         test_file = test_dir.join('test_file.html')
116 |         test_file.write('content')
117 |         model = SiteFile(
118 |             filename=str(test_file),
119 |             dir_prefix=str(test_dir),
120 |             site_prefix='/site',
121 |             cache_control='max-age=60')
122 | 
123 |         assert model is not None
124 |         assert model.is_compressible is True
125 |         assert model.is_compressed is True
126 | 
127 |         assert model.md5 == generate_file_hash(test_file)
128 |         assert model.s3_key == '/site/test_file.html'
129 |         assert model.dir_prefix == str(test_dir)
130 |         assert model.content_encoding == 'gzip'
131 |         assert model.content_type == 'text/html'
132 | 
133 |         # Make sure upload is called correctly
134 |         s3_client = Mock()
135 |         model.upload_to_s3('test-bucket', s3_client)
136 |         s3_client.upload_file.assert_called_once_with(
137 |             Filename=str(test_file),
138 |             Bucket='test-bucket',
139 |             Key='/site/test_file.html',
140 |             ExtraArgs={
141 |                 'CacheControl': 'max-age=60',
142 |                 'ServerSideEncryption': 'AES256',
143 |                 'ContentType': 'text/html',
144 |                 'ContentEncoding': 'gzip',
145 |             },
146 |         )
147 | 
148 | 
149 | class TestSiteRedirect():
150 |     def test_constructor_and_props(self, tmpdir):
151 |         base_test_dir = tmpdir.mkdir('boop')
152 |         test_dir = base_test_dir.mkdir('sub_dir')
153 | 
154 |         model = SiteRedirect(
155 |             filename=str(test_dir),
156 |             dir_prefix=str(base_test_dir),
157 |             site_prefix='prefix',
158 |             base_url='/preview',
159 |             cache_control='max-age=60'
160 |         )
161 | 
162 |         assert model is not None
163 | 
164 |         expected_dest = '/preview/sub_dir/'
165 |         assert model.md5 == hashlib.md5(expected_dest.encode()).hexdigest()
166 |         assert model.destination == expected_dest
167 |         assert model.s3_key == 'prefix/sub_dir'
168 | 
169 |         # try with empty dir_prefix
170 |         model.dir_prefix = ''
171 |         assert model.destination == f'/preview/{test_dir}/'
172 |         assert model.s3_key == f'prefix/{test_dir}'
173 | 
174 |         # and when we're dealing with the "root" redirect object
175 |         # ie, filename and dir_prefix are the same
176 |         model.filename = str(base_test_dir)
177 |         model.dir_prefix = str(base_test_dir)
178 |         assert model.destination == '/preview/'
179 |         assert model.s3_key == 'prefix'
180 | 
181 |     def test_upload_to_s3(self, tmpdir):
182 |         base_test_dir = tmpdir.mkdir('boop')
183 |         test_dir = base_test_dir.mkdir('wherever')
184 | 
185 |         model = SiteRedirect(
186 |             filename=str(test_dir),
187 |             dir_prefix=str(base_test_dir),
188 |             site_prefix='site-prefix',
189 |             base_url='/site/test',
190 |             cache_control='max-age=60'
191 |         )
192 | 
193 |         s3_client = Mock()
194 |         model.upload_to_s3('test-bucket', s3_client)
195 | 
196 |         expected_dest = '/site/test/wherever/'
197 | 
198 |         s3_client.put_object.assert_called_once_with(
199 |             Body=expected_dest,
200 |             Bucket='test-bucket',
201 |             Key='site-prefix/wherever',
202 |             ServerSideEncryption='AES256',
203 |             WebsiteRedirectLocation=expected_dest,
204 |             CacheControl="max-age=60",
205 |         )
206 | 


--------------------------------------------------------------------------------
/test/publishing/test_s3publisher.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import pytest
  3 | import requests_mock
  4 | 
  5 | from moto import mock_aws
  6 | 
  7 | from publishing.s3publisher import list_remote_objects, publish_to_s3
  8 | from publishing.models import SiteObject
  9 | 
 10 | import repo_config
 11 | 
 12 | TEST_BUCKET = 'test-bucket'
 13 | TEST_REGION = 'test-region'
 14 | TEST_ACCESS_KEY = 'fake-access-key'
 15 | TEST_SECRET_KEY = 'fake-secret-key'
 16 | 
 17 | 
 18 | @pytest.fixture
 19 | def s3_client(monkeypatch):
 20 |     monkeypatch.setenv('AWS_ACCESS_KEY_ID', TEST_ACCESS_KEY)
 21 |     monkeypatch.setenv('AWS_SECRET_ACCESS_KEY', TEST_SECRET_KEY)
 22 | 
 23 |     with mock_aws():
 24 |         conn = boto3.resource('s3', region_name=TEST_REGION)
 25 | 
 26 |         conn.create_bucket(
 27 |             Bucket=TEST_BUCKET,
 28 |             CreateBucketConfiguration={"LocationConstraint": "test-bucket"}
 29 |         )
 30 | 
 31 |         s3_client = boto3.client(
 32 |             service_name='s3',
 33 |             region_name=TEST_REGION,
 34 |             aws_access_key_id=TEST_ACCESS_KEY,
 35 |             aws_secret_access_key=TEST_SECRET_KEY,
 36 |         )
 37 | 
 38 |         yield s3_client
 39 | 
 40 | 
 41 | def test_list_remote_objects(monkeypatch, s3_client):
 42 |     # Check that nothing is returned if nothing is in the bucket
 43 |     results = list_remote_objects(TEST_BUCKET, '/test-site', s3_client)
 44 |     assert results == []
 45 | 
 46 |     # Add a few objects with different prefixes
 47 |     s3_client.put_object(Key='test-site/a', Body='a', Bucket=TEST_BUCKET)
 48 |     s3_client.put_object(Key='wrong-prefix/b', Body='b', Bucket=TEST_BUCKET)
 49 | 
 50 |     # Check that only one object matching the prefix is returned
 51 |     results = list_remote_objects(TEST_BUCKET, 'test-site', s3_client)
 52 |     assert len(results) == 1
 53 |     assert type(results[0]) == SiteObject
 54 |     assert results[0].s3_key == 'test-site/a'
 55 | 
 56 |     # Add a few more objects
 57 |     for i in range(0, 10):
 58 |         s3_client.put_object(Key=f'test-site/sub/{i}.html',
 59 |                              Body=f'{i}', Bucket=TEST_BUCKET)
 60 | 
 61 |     # Monkeypatch max keys so we can ensure ContinuationTokens are used
 62 |     monkeypatch.setattr('publishing.s3publisher.MAX_S3_KEYS_PER_REQUEST', 5)
 63 | 
 64 |     # Check that we get all expected objects back
 65 |     results = list_remote_objects(TEST_BUCKET, 'test-site', s3_client)
 66 |     assert len(results) == 11  # 10 keys from the loop, 1 from previous put
 67 | 
 68 | 
 69 | def _make_fake_files(dir, filenames):
 70 |     for f_name in filenames:
 71 |         file = dir.join(f_name)
 72 |         file.write(f'fake content for {f_name}')
 73 | 
 74 | 
 75 | def test_publish_to_s3(tmpdir, s3_client):
 76 |     # Use tmpdir to create a fake directory
 77 |     # full of directories and files to be published/deleted/updated
 78 |     test_dir = tmpdir.mkdir('test_dir')
 79 | 
 80 |     # make a subdirectory
 81 |     test_dir.mkdir('sub_dir')
 82 | 
 83 |     site_prefix = 'test_dir'
 84 | 
 85 |     filenames = ['index.html',
 86 |                  'boop.txt',
 87 |                  'sub_dir/index.html']
 88 | 
 89 |     _make_fake_files(test_dir, filenames)
 90 | 
 91 |     federalist_config = repo_config.from_object(
 92 |         {
 93 |             'headers': [
 94 |                 {'/index.html': {'cache-control': 'no-cache'}},
 95 |                 {'/*.txt': {'cache-control': 'max-age=1000'}}
 96 |             ],
 97 |             'excludePaths': [
 98 |                 '/excluded-file'
 99 |             ]
100 |         },
101 |         {
102 |             'headers': {
103 |                 'cache-control': 'max-age=60'
104 |             },
105 |             'excludePaths': [
106 |                 '*/Dockerfile',
107 |                 '*/docker-compose.yml'
108 |             ],
109 |             'includePaths': [
110 |                 '/.well-known/security.txt'
111 |             ]
112 |         }
113 |     )
114 | 
115 |     publish_kwargs = {
116 |         'directory': str(test_dir),
117 |         'base_url': '/base_url',
118 |         'site_prefix': site_prefix,
119 |         'bucket': TEST_BUCKET,
120 |         'federalist_config': federalist_config,
121 |         's3_client': s3_client,
122 |     }
123 | 
124 |     # Create mock for default 404 page request
125 |     with requests_mock.mock() as m:
126 |         m.get(('https://raw.githubusercontent.com'
127 |                '/cloud-gov/pages-404-page/main/'
128 |                '404-pages-client.html'),
129 |               text='default 404 page')
130 | 
131 |         publish_to_s3(**publish_kwargs)
132 | 
133 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
134 | 
135 |         keys = [r['Key'] for r in results['Contents']]
136 | 
137 |         assert results['KeyCount'] == 6  # 4 files, 3 redirects & 404.html
138 | 
139 |         assert f'{site_prefix}/index.html' in keys
140 |         assert f'{site_prefix}/boop.txt' in keys
141 |         assert f'{site_prefix}/sub_dir' in keys
142 |         assert f'{site_prefix}/sub_dir/index.html' in keys
143 |         assert f'{site_prefix}/404.html' in keys
144 |         assert f'{site_prefix}' in keys  # main redirect object
145 | 
146 |         # Check the cache control headers
147 |         cache_control_checks = [
148 |             ('index.html',  'no-cache'),
149 |             ('boop.txt',    'max-age=1000'),
150 |             ('404.html',    'max-age=60')
151 |         ]
152 |         for filename, expected in cache_control_checks:
153 |             result = s3_client.get_object(
154 |                         Bucket=TEST_BUCKET,
155 |                         Key=f'{site_prefix}/{filename}')['CacheControl']
156 |             assert result == expected
157 | 
158 |         # Add another file to the directory
159 |         more_filenames = ['new_index.html']
160 |         _make_fake_files(test_dir, more_filenames)
161 |         publish_to_s3(**publish_kwargs)
162 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
163 | 
164 |         assert results['KeyCount'] == 7
165 | 
166 |         # Delete some files and check that the published files count
167 |         # is correct
168 |         test_dir.join('new_index.html').remove()
169 |         test_dir.join('boop.txt').remove()
170 |         publish_to_s3(**publish_kwargs)
171 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
172 |         assert results['KeyCount'] == 5
173 | 
174 |         # Write an existing file with different content so that it
175 |         # needs to get updated
176 |         index_key = f'{site_prefix}/index.html'
177 |         orig_etag = s3_client.get_object(
178 |                         Bucket=TEST_BUCKET,
179 |                         Key=index_key)['ETag']
180 |         test_dir.join('index.html').write('totally new content!!!')
181 |         publish_to_s3(**publish_kwargs)
182 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
183 | 
184 |         # number of keys should be the same
185 |         assert results['KeyCount'] == 5
186 | 
187 |         # make sure content in changed file is updated
188 |         new_etag = s3_client.get_object(
189 |                     Bucket=TEST_BUCKET,
190 |                     Key=index_key)['ETag']
191 |         assert new_etag != orig_etag
192 | 
193 |         # test hidden files and directories
194 |         test_dir.mkdir('.well-known')
195 |         test_dir.mkdir('.not-well-known')
196 |         more_filenames = ['.well-known/security.txt',
197 |                           '.well-known/not-security.txt',
198 |                           '.well-known/.security',
199 |                           '.not-well-known/security.txt',
200 |                           '.security']
201 |         _make_fake_files(test_dir, more_filenames)
202 |         publish_to_s3(**publish_kwargs)
203 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
204 |         assert results['KeyCount'] == 6
205 | 
206 |         # make sure default excluded files are excluded by default
207 |         more_filenames = ['Dockerfile',
208 |                           'docker-compose.yml']
209 |         _make_fake_files(test_dir, more_filenames)
210 |         publish_to_s3(**publish_kwargs)
211 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
212 |         assert results['KeyCount'] == 6
213 | 
214 |         # make sure files can be excluded in configuration
215 |         more_filenames = ['excluded-file']
216 |         _make_fake_files(test_dir, more_filenames)
217 |         publish_to_s3(**publish_kwargs)
218 |         results = s3_client.list_objects_v2(Bucket=TEST_BUCKET)
219 |         assert results['KeyCount'] == 6
220 | 


--------------------------------------------------------------------------------
/test/repo_config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/repo_config/__init__.py


--------------------------------------------------------------------------------
/test/repo_config/test_repo_config.py:
--------------------------------------------------------------------------------
  1 | from repo_config.repo_config import (RepoConfig, contains_dotpath, match_path,
  2 |                                      find_first_matching_cfg)
  3 | 
  4 | 
  5 | def test_match_path():
  6 | 
  7 |     # (<cfg_path>, <path_to_match>, <expected result>)
  8 |     configs = [
  9 |         # static paths
 10 |         ('/',            '/',            True),
 11 |         ('/',            '/hello',       False),
 12 |         ('/hello',       '/hello',       True),
 13 |         ('/hello',       '/hello/world', False),
 14 |         ('/hello/world', '/hello',       False),
 15 | 
 16 |         # wildcard paths
 17 |         ('/*',       '/',                             True),
 18 |         ('/*',       '/hello',                        True),
 19 |         ('/*',       '/hello.js',                     True),
 20 |         ('/*',       '/hello/world',                  True),
 21 |         ('/hello/*', '/hello/world',                  True),
 22 |         ('/hello/*', '/world',                        False),
 23 |         ('/hello/*', '/hello/sdhgfsjdh/dkjhsfhsdfkj', True),
 24 | 
 25 |         # wildcard extension paths
 26 |         ('/*.html',     '/',                 False),
 27 |         ('/*.html',     '/foo',              False),
 28 |         ('/*.html',     '/foo.js',           False),
 29 |         ('/*.html',     '/foo.html',         True),
 30 |         ('/bar/*.html', '/foo.html',         False),
 31 |         ('/bar/*.html', '/bar/foo.html',     True),
 32 |         ('/bar/*.html', '/bar/foo.js',       False),
 33 |         ('/bar/*.html', '/bar/baz/foo.html', True),
 34 |         ('/bar/*.html', '/bar/baz/foo.js',   False),
 35 |         ('/bar/*.map',  '/bar/foo.js.map',   True),
 36 | 
 37 |         # segment wildcard paths
 38 |         ('/:hello',       '/',             True),
 39 |         ('/:hello',       '/booyah',       True),
 40 |         ('/:hello',       '/booyah/world', False),
 41 |         ('/:hello/world', '/booyah/world', True),
 42 |         ('/:hello/world', '/booyah/hello', False),
 43 |         ('/:hello/world', '/booyah',       False),
 44 | 
 45 |         # crazy town
 46 |         ('/hello/*/foo',       '/hello/sdhgfsjdh/dkjhsfhsdfkj', True),
 47 |         ('/:hello/world/*',    '/booyah/world',                 True),
 48 |         ('/:hello/world/*',    '/booyah/world/foo',             True),
 49 |         ('/hi/:hello/world/*', '/hi/booyah/nope',               False),
 50 |         ('/hi/:hello/world/*', '/hi/booyah/world',              True),
 51 |         ('/hi/:hello/world/*', '/hi/booyah/world/crazy',        True),
 52 | 
 53 |         # even when missing leading '/'
 54 |         (':hello/world/*',  '/booyah/world/foo', True),
 55 |         (':hello/world/*',  'booyah/world/foo',  True),
 56 |         ('/:hello/world/*', 'booyah/world',      True),
 57 |     ]
 58 | 
 59 |     for cfg_path, path_to_match, expected_result in configs:
 60 |         assert match_path(cfg_path, path_to_match) == expected_result
 61 | 
 62 | 
 63 | def test_find_first_matching_cfg():
 64 |     headers = [
 65 |         {'/index.html':  {'cache-control': 'no-cache'}},
 66 |         {'/:foo/*.html': {'cache-control': 'max-age=2000'}},
 67 |         {'/*.html':      {'cache-control': 'max-age=4000'}},
 68 |         {'/*':           {'cache-control': 'max-age=6000'}}
 69 |     ]
 70 | 
 71 |     configs = [
 72 |       (headers, '/index.html',   headers[0]),
 73 |       (headers, '/foo/bar.html', headers[1]),
 74 |       (headers, '/foo.html',     headers[2]),
 75 |       (headers, '/',             headers[3]),
 76 |       (headers, '/bar.js',       headers[3]),
 77 |       ({},      '/bar.js',       {})
 78 |     ]
 79 | 
 80 |     for cfg_headers, path_to_match, expected_result in configs:
 81 |         assert find_first_matching_cfg(
 82 |                 cfg_headers, path_to_match) == expected_result
 83 | 
 84 | 
 85 | def test_get_headers_for_path():
 86 |     config = {
 87 |         'headers': [
 88 |             {'/index.html': {'cache-control': 'no-cache'}},
 89 |             {'/*.html':     {'cache-control': 'max-age=4000'}},
 90 |             {'/*':          {'cache-control': 'max-age=6000'}}
 91 |         ]
 92 |     }
 93 | 
 94 |     defaults = {
 95 |         'headers': {
 96 |             'cache-control': 'max-age=60',
 97 |             'foo-header': 'special-stuff:with-a-colon!'
 98 |         }
 99 |     }
100 | 
101 |     repo_config = RepoConfig(config=config, defaults=defaults)
102 | 
103 |     # When multiple paths match, return the first
104 |     path_to_match = '/index.html'
105 |     value = repo_config.get_headers_for_path(path_to_match)
106 |     assert value == {
107 |         'cache-control': 'no-cache',
108 |         'foo-header': 'special-stuff:with-a-colon!'
109 |     }
110 | 
111 |     # Match the partial wildcard
112 |     path_to_match = '/foo.html'
113 |     value = repo_config.get_headers_for_path(path_to_match)
114 |     assert value == {
115 |         'cache-control': 'max-age=4000',
116 |         'foo-header': 'special-stuff:with-a-colon!'
117 |     }
118 | 
119 |     # Match the total wildcard
120 |     path_to_match = '/foo.js'
121 |     value = repo_config.get_headers_for_path(path_to_match)
122 |     assert value == {
123 |         'cache-control': 'max-age=6000',
124 |         'foo-header': 'special-stuff:with-a-colon!'
125 |     }
126 | 
127 |     # Match default
128 |     config = {
129 |         'headers': [
130 |             {'/index.html': {'cache-control': 'max-age=3000'}}
131 |         ]
132 |     }
133 |     repo_config = RepoConfig(config=config, defaults=defaults)
134 | 
135 |     path_to_match = '/foo.js'
136 |     value = repo_config.get_headers_for_path(path_to_match)
137 |     assert value == defaults['headers']
138 | 
139 |     # Match no headers!
140 |     config = {}
141 |     repo_config = RepoConfig(config=config, defaults=defaults)
142 | 
143 |     path_to_match = '/foo.js'
144 |     value = repo_config.get_headers_for_path(path_to_match)
145 |     assert value == defaults['headers']
146 | 
147 | 
148 | def test_exclude_paths_always_returns_a_list():
149 |     repo_config = RepoConfig(config={}, defaults={})
150 |     value = repo_config.exclude_paths()
151 |     assert value == []
152 | 
153 | 
154 | def test_exclude_paths_returns_union_of_config_and_defaults():
155 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
156 |     value = repo_config.exclude_paths()
157 |     assert value == [
158 |         '/excluded-file',
159 |         '/excluded-folder',
160 |         '/excluded-folder/*',
161 |         '*/Dockerfile',
162 |         '/docker-compose.yml'
163 |     ]
164 | 
165 | 
166 | def test_include_paths_always_returns_a_list():
167 |     repo_config = RepoConfig(config={}, defaults={})
168 |     value = repo_config.include_paths()
169 |     assert value == []
170 | 
171 | 
172 | def test_include_paths_returns_union_of_config_and_defaults():
173 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
174 |     value = repo_config.include_paths()
175 |     assert value == [
176 |         '/foo/Dockerfile',
177 |         '*/.foo',
178 |         '/.well-known/security.txt'
179 |     ]
180 | 
181 | 
182 | def test_is_exclude_path_match():
183 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
184 | 
185 |     # Excludes default file anywhere
186 |     value = repo_config.is_exclude_path_match('/Dockerfile')
187 |     assert value is True
188 | 
189 |     value = repo_config.is_exclude_path_match('/foo/Dockerfile')
190 |     assert value is True
191 | 
192 |     value = repo_config.is_exclude_path_match('/foo/bar/baz/Dockerfile')
193 |     assert value is True
194 | 
195 |     # Excludes default file only at root
196 |     value = repo_config.is_exclude_path_match('/docker-compose.yml')
197 |     assert value is True
198 | 
199 |     value = repo_config.is_exclude_path_match('/foo/docker-compose.yml')
200 |     assert value is False
201 | 
202 |     # Excludes a file explicitly excluded
203 |     value = repo_config.is_exclude_path_match('/excluded-file')
204 |     assert value is True
205 | 
206 |     # Doesn't exclude a file not explicitly excluded
207 |     value = repo_config.is_exclude_path_match('/index.html')
208 |     assert value is False
209 | 
210 | 
211 | def test_is_include_path_match():
212 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
213 | 
214 |     # Includes default file only in root
215 |     value = repo_config.is_include_path_match('/.well-known/security.txt')
216 |     assert value is True
217 | 
218 |     # Includes Dockerfile when that default is overridden by configuration
219 |     value = repo_config.is_include_path_match('/foo/Dockerfile')
220 |     assert value is True
221 | 
222 |     # Includes dot file
223 |     value = repo_config.is_include_path_match('/foo/bar/.foo')
224 |     assert value is True
225 | 
226 | 
227 | def test_is_path_excluded():
228 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
229 | 
230 |     # Excludes dotfiles
231 |     value = repo_config.is_path_excluded('/.bar')
232 |     assert value is True
233 | 
234 |     value = repo_config.is_path_excluded('/foo/.bar')
235 |     assert value is True
236 | 
237 |     # Includes dotfiles when specified
238 |     value = repo_config.is_path_excluded('/.well-known/security.txt')
239 |     assert value is False
240 | 
241 |     value = repo_config.is_path_excluded('/bar/.foo')
242 |     assert value is False
243 | 
244 |     # Excludes defaults
245 |     value = repo_config.is_path_excluded('/Dockerfile')
246 |     assert value is True
247 | 
248 |     value = repo_config.is_path_excluded('/bar/Dockerfile')
249 |     assert value is True
250 | 
251 |     value = repo_config.is_path_excluded('/docker-compose.yml')
252 |     assert value is True
253 | 
254 |     value = repo_config.is_path_excluded('/foo/docker-compose.yml')
255 |     assert value is False
256 | 
257 |     # Excludes configured files
258 |     value = repo_config.is_path_excluded('/excluded-file')
259 |     assert value is True
260 | 
261 |     value = repo_config.is_path_excluded('/foo/excluded-file')
262 |     assert value is False
263 | 
264 |     # Excludes configured folders
265 |     value = repo_config.is_path_excluded('/excluded-folder')
266 |     assert value is True
267 | 
268 |     value = repo_config.is_path_excluded('/excluded-folder/')
269 |     assert value is True
270 | 
271 |     value = repo_config.is_path_excluded('/excluded-folder/foo.txt')
272 |     assert value is True
273 | 
274 |     value = repo_config.is_path_excluded('/foo/excluded-folder/foo.txt')
275 |     assert value is False
276 | 
277 |     # Includes configured that overrides default
278 |     value = repo_config.is_path_excluded('/foo/Dockerfile')
279 |     assert value is False
280 | 
281 |     # Prepends slashes
282 |     value = repo_config.is_path_excluded('excluded-file')
283 |     assert value is True
284 | 
285 |     value = repo_config.is_path_excluded('foo/excluded-file')
286 |     assert value is False
287 | 
288 | 
289 | def test_is_path_included_is_not_is_path_excluded():
290 |     repo_config = RepoConfig(config=test_config(), defaults=test_defaults())
291 |     path = '/bar/.foo'
292 |     included_value = repo_config.is_path_included(path)
293 |     excluded_value = repo_config.is_path_excluded(path)
294 |     assert included_value is not excluded_value
295 | 
296 | 
297 | def test_contains_dotpath():
298 |     value = contains_dotpath('/.foo')
299 |     assert value is True
300 | 
301 |     value = contains_dotpath('/.foo/bar')
302 |     assert value is True
303 | 
304 |     value = contains_dotpath('/foo/.bar')
305 |     assert value is True
306 | 
307 |     value = contains_dotpath('/foo/.bar/baz')
308 |     assert value is True
309 | 
310 |     value = contains_dotpath('/foo/bar')
311 |     assert value is False
312 | 
313 | 
314 | def test_config():
315 |     return {
316 |         'excludePaths': [
317 |             '/excluded-file',
318 |             '/excluded-folder',
319 |             '/excluded-folder/*'
320 |         ],
321 |         'includePaths': [
322 |             '/foo/Dockerfile',
323 |             '*/.foo'
324 |         ]
325 |     }
326 | 
327 | 
328 | def test_defaults():
329 |     return {
330 |         'excludePaths': [
331 |             '*/Dockerfile',
332 |             '/docker-compose.yml'
333 |         ],
334 |         'includePaths': [
335 |             '/.well-known/security.txt'
336 |         ]
337 |     }
338 | 


--------------------------------------------------------------------------------
/test/support.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import hashlib
 3 | 
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def patch_dir(monkeypatch, module, dir_constant):
 8 |     with tempfile.TemporaryDirectory() as tmpdir:
 9 |         tmpdir_path = Path(tmpdir)
10 |         monkeypatch.setattr(module, dir_constant, tmpdir_path)
11 |         yield tmpdir_path
12 | 
13 | 
14 | def create_file(file_path, contents='', mode='w'):
15 |     with file_path.open(mode) as f:
16 |         f.write(contents)
17 | 
18 | 
19 | def generate_file_hash(filename):
20 |     hash_md5 = hashlib.md5()  # nosec
21 | 
22 |     with open(filename, 'rb') as file:
23 |         for chunk in iter(lambda: file.read(4096), b""):
24 |             hash_md5.update(chunk)
25 | 
26 |     return hash_md5.hexdigest()
27 | 


--------------------------------------------------------------------------------
/test/test_build.py:
--------------------------------------------------------------------------------
  1 | from pytest import raises
  2 | import json
  3 | import os
  4 | from io import StringIO
  5 | from unittest.mock import call, patch, Mock
  6 | from subprocess import CalledProcessError  # nosec
  7 | 
  8 | import pytest
  9 | import requests_mock
 10 | import requests
 11 | import yaml
 12 | 
 13 | import steps
 14 | from steps import (
 15 |     build_hugo, build_jekyll, build_static, download_hugo,
 16 |     run_build_script, run_step, setup_bundler, setup_node, setup_ruby, StepException
 17 | )
 18 | from steps.build import (
 19 |     build_env, check_supported_ruby_version, BUNDLER_VERSION, GEMFILE,
 20 |     GEMFILELOCK, HUGO_BIN, HUGO_VERSION, JEKYLL_CONFIG_YML,
 21 |     NVMRC, PACKAGE_JSON, PACKAGE_LOCK, RUBY_VERSION
 22 | )
 23 | 
 24 | from .support import create_file, patch_dir
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def patch_clone_dir(monkeypatch):
 29 |     yield from patch_dir(monkeypatch, steps.build, 'CLONE_DIR_PATH')
 30 | 
 31 | 
 32 | @pytest.fixture
 33 | def patch_working_dir(monkeypatch):
 34 |     yield from patch_dir(monkeypatch, steps.build, 'WORKING_DIR_PATH')
 35 | 
 36 | 
 37 | @pytest.fixture
 38 | def patch_site_build_dir(monkeypatch):
 39 |     yield from patch_dir(monkeypatch, steps.build, 'SITE_BUILD_DIR_PATH')
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def patch_ruby_min_version(monkeypatch):
 44 |     monkeypatch.setenv('RUBY_VERSION_MIN', '3.0.0')
 45 | 
 46 | 
 47 | @patch('steps.build.run')
 48 | @patch('steps.build.get_logger')
 49 | class TestSetupNode():
 50 |     def test_it_uses_nvmrc_file_if_it_exists(self, mock_get_logger, mock_run, patch_clone_dir):
 51 |         create_file(patch_clone_dir / NVMRC, contents='6')
 52 | 
 53 |         mock_post_metrics = Mock()
 54 |         setup_node(False, None, None, mock_post_metrics)
 55 | 
 56 |         mock_get_logger.assert_called_once_with('setup-node')
 57 | 
 58 |         mock_logger = mock_get_logger.return_value
 59 | 
 60 |         mock_logger.info.assert_called_with(
 61 |             'Checking node version specified in .nvmrc'
 62 |         )
 63 | 
 64 |         mock_post_metrics.assert_called_once()
 65 | 
 66 |     def test_installs_deps(self, mock_get_logger, mock_run, patch_clone_dir):
 67 |         create_file(patch_clone_dir / PACKAGE_LOCK)
 68 | 
 69 |         mock_post_metrics = Mock()
 70 |         setup_node(False, None, None, mock_post_metrics)
 71 | 
 72 |         mock_get_logger.assert_called_once_with('setup-node')
 73 | 
 74 |         mock_logger = mock_get_logger.return_value
 75 | 
 76 |         mock_logger.info.assert_has_calls([
 77 |             call('Using default node version'),
 78 |             call('Installing dependencies in package-lock.json')
 79 |         ])
 80 | 
 81 |         def callp(cmd, skip_log=False):
 82 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, node=True, skip_log=skip_log)
 83 | 
 84 |         mock_run.assert_has_calls([
 85 |             callp('echo Node version: $(node --version)'),
 86 |             callp('echo NPM version: $(npm --version)'),
 87 |             callp('node --version', skip_log=True),
 88 |             callp('npm set audit false'),
 89 |             callp('npm ci'),
 90 |         ])
 91 | 
 92 |         mock_post_metrics.assert_called_once()
 93 | 
 94 |     def test_returns_code_when_err(self, mock_get_logger, mock_run):
 95 |         mock_run.side_effect = CalledProcessError(1, 'command')
 96 | 
 97 |         mock_post_metrics = Mock()
 98 |         with pytest.raises(CalledProcessError):
 99 |             setup_node(False, None, None, mock_post_metrics)
100 | 
101 | 
102 | @patch('steps.build.run')
103 | @patch('steps.build.get_logger')
104 | class TestRunBuildScript():
105 |     def test_it_runs_federalist_script_when_it_exists(self, mock_get_logger, mock_run,
106 |                                                       patch_clone_dir):
107 |         package_json_contents = json.dumps({
108 |             'scripts': {
109 |                 'federalist': 'echo federalist',
110 |             },
111 |         })
112 |         create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents)
113 | 
114 |         kwargs = dict(
115 |             branch='branch',
116 |             owner='owner',
117 |             repository='repo',
118 |             site_prefix='site/prefix',
119 |             base_url='/site/prefix'
120 |         )
121 | 
122 |         run_build_script(**kwargs)
123 | 
124 |         mock_get_logger.assert_called_once_with('run-federalist-script')
125 | 
126 |         mock_logger = mock_get_logger.return_value
127 | 
128 |         mock_logger.info.assert_called_with(
129 |             'Running federalist build script in package.json'
130 |         )
131 | 
132 |         mock_run.assert_called_once_with(
133 |             mock_logger,
134 |             'npm run federalist',
135 |             cwd=patch_clone_dir,
136 |             env=build_env(*kwargs.values()),
137 |             node=True
138 |         )
139 | 
140 |     def test_it_runs_pages_script_when_it_exists(self, mock_get_logger, mock_run,
141 |                                                  patch_clone_dir):
142 |         package_json_contents = json.dumps({
143 |             'scripts': {
144 |                 'pages': 'echo pages',
145 |             },
146 |         })
147 |         create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents)
148 | 
149 |         kwargs = dict(
150 |             branch='branch',
151 |             owner='owner',
152 |             repository='repo',
153 |             site_prefix='site/prefix',
154 |             base_url='/site/prefix'
155 |         )
156 | 
157 |         run_build_script(**kwargs)
158 | 
159 |         mock_get_logger.assert_called_once_with('run-pages-script')
160 | 
161 |         mock_logger = mock_get_logger.return_value
162 | 
163 |         mock_logger.info.assert_called_with(
164 |             'Running pages build script in package.json'
165 |         )
166 | 
167 |         mock_run.assert_called_once_with(
168 |             mock_logger,
169 |             'npm run pages',
170 |             cwd=patch_clone_dir,
171 |             env=build_env(*kwargs.values()),
172 |             node=True
173 |         )
174 | 
175 |     def test_it_only_runs_pages_script_when_both_exist(self, mock_get_logger, mock_run,
176 |                                                        patch_clone_dir):
177 |         package_json_contents = json.dumps({
178 |             'scripts': {
179 |                 'pages': 'echo pages',
180 |                 'federalist': 'echo federalist',
181 |             },
182 |         })
183 |         create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents)
184 | 
185 |         kwargs = dict(
186 |             branch='branch',
187 |             owner='owner',
188 |             repository='repo',
189 |             site_prefix='site/prefix',
190 |             base_url='/site/prefix'
191 |         )
192 | 
193 |         run_build_script(**kwargs)
194 | 
195 |         mock_get_logger.assert_called_once_with('run-pages-script')
196 | 
197 |         mock_logger = mock_get_logger.return_value
198 | 
199 |         mock_logger.info.assert_called_with(
200 |             'Running pages build script in package.json'
201 |         )
202 | 
203 |         mock_run.assert_called_once_with(
204 |             mock_logger,
205 |             'npm run pages',
206 |             cwd=patch_clone_dir,
207 |             env=build_env(*kwargs.values()),
208 |             node=True
209 |         )
210 | 
211 |     def test_it_does_not_run_otherwise(self, mock_get_logger, mock_run):
212 |         run_build_script('b', 'o', 'r', 'sp')
213 | 
214 |         mock_get_logger.assert_not_called()
215 |         mock_run.assert_not_called()
216 | 
217 | 
218 | class TestRunStep():
219 |     def test_it_should_raise_an_exception_from_step(self):
220 |         msg = 'testing-msg'
221 |         arg1 = 'arg1'
222 |         kwarg1 = 'kwarg1'
223 |         mock_step = Mock(side_effect=KeyError)
224 | 
225 |         with raises(StepException):
226 |             run_step(mock_step, msg, arg1, kwarg1=kwarg1)
227 | 
228 |         mock_step.assert_called_once_with(arg1, kwarg1=kwarg1)
229 | 
230 |     def test_it_should_run_step_successfully(self):
231 |         msg = 'testing-msg'
232 |         arg1 = 'arg1'
233 |         kwarg1 = 'kwarg1'
234 |         mock_step = Mock()
235 |         mock_step.return_value()
236 | 
237 |         run_step(mock_step, msg, arg1, kwarg1=kwarg1)
238 | 
239 |         mock_step.assert_called_once_with(arg1, kwarg1=kwarg1)
240 | 
241 | 
242 | @patch('steps.build.run')
243 | @patch('steps.build.get_logger')
244 | @patch('steps.build.check_supported_ruby_version')
245 | class TestSetupRuby():
246 |     def test_no_ruby_version_file(self, mock_check_supported_ruby_version,
247 |                                   mock_get_logger, mock_run, patch_clone_dir):
248 | 
249 |         mock_post_metrics = Mock()
250 |         setup_ruby(False, mock_post_metrics)
251 | 
252 |         mock_get_logger.assert_called_once_with('setup-ruby')
253 |         mock_logger = mock_get_logger.return_value
254 | 
255 |         mock_post_metrics.assert_called_once()
256 | 
257 |         def callp(cmd, skip_log=False):
258 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log)
259 | 
260 |         mock_run.assert_has_calls([
261 |             callp('ruby -v', skip_log=True),
262 |             callp('echo Ruby version: $(ruby -v)')
263 |         ])
264 | 
265 |     def test_it_uses_ruby_version_if_it_exists(self,
266 |                                                mock_check_supported_ruby_version,
267 |                                                mock_get_logger, mock_run,
268 |                                                patch_clone_dir):
269 | 
270 |         version = '3.1'
271 | 
272 |         create_file(patch_clone_dir / RUBY_VERSION, version)
273 | 
274 |         mock_post_metrics = Mock()
275 |         setup_ruby(False, mock_post_metrics)
276 | 
277 |         mock_get_logger.assert_called_once_with('setup-ruby')
278 | 
279 |         mock_logger = mock_get_logger.return_value
280 | 
281 |         def callp(cmd, skip_log=False):
282 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log)
283 | 
284 |         mock_run.assert_has_calls([
285 |             callp(f'rvm install {version}'),
286 |             callp('ruby -v', skip_log=True),
287 |             callp('echo Ruby version: $(ruby -v)')
288 |         ])
289 | 
290 |     def test_it_strips_and_quotes_ruby_version(self,
291 |                                                mock_check_supported_ruby_version,
292 |                                                mock_get_logger, mock_run,
293 |                                                patch_clone_dir):
294 | 
295 |         version = '  $3.1  '
296 |         create_file(patch_clone_dir / RUBY_VERSION, version)
297 | 
298 |         mock_post_metrics = Mock()
299 |         setup_ruby(False, mock_post_metrics)
300 | 
301 |         mock_get_logger.assert_called_once_with('setup-ruby')
302 | 
303 |         mock_logger = mock_get_logger.return_value
304 | 
305 |         def callp(cmd, skip_log=False):
306 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log)
307 | 
308 |         mock_logger.info.assert_has_calls([
309 |             call('Using ruby version in .ruby-version'),
310 |         ])
311 | 
312 |         mock_run.assert_has_calls([
313 |             callp("rvm install '$3.1'"),
314 |             callp('ruby -v', skip_log=True),
315 |             callp('echo Ruby version: $(ruby -v)'),
316 |         ])
317 | 
318 |     def test_it_errors_when_rvm_install_fails(self,
319 |                                               mock_check_supported_ruby_version,
320 |                                               mock_get_logger, mock_run,
321 |                                               patch_clone_dir):
322 | 
323 |         version = '3.1'
324 |         create_file(patch_clone_dir / RUBY_VERSION, version)
325 | 
326 |         error = 'error installing ruby'
327 |         mock_run.side_effect = Exception(error)
328 | 
329 |         mock_post_metrics = Mock()
330 |         with pytest.raises(Exception) as einfo:
331 |             setup_ruby(False, mock_post_metrics)
332 | 
333 |         mock_get_logger.assert_called_once_with('setup-ruby')
334 | 
335 |         assert str(einfo.value).strip() == error
336 | 
337 |     def test_it_outputs_warning_if_eol_approaching(self,
338 |                                                    mock_check_supported_ruby_version,
339 |                                                    mock_get_logger, mock_run,
340 |                                                    patch_ruby_min_version):
341 | 
342 |         min_ruby_version = os.getenv('RUBY_VERSION_MIN')
343 |         check_supported_ruby_version(min_ruby_version)
344 | 
345 |         mock_logger = mock_get_logger.return_value
346 | 
347 |         mock_logger.warning.assert_has_calls([
348 |             call(
349 |                 f'WARNING: Ruby {min_ruby_version} will soon reach end-of-life, at which point Pages will no longer support it.'),  # noqa: E501
350 |             call('Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.')  # noqa: E501
351 |         ])
352 | 
353 |     def test_it_outputs_warning_if_not_supported(self,
354 |                                                  mock_check_supported_ruby_version,
355 |                                                  mock_get_logger, mock_run, patch_ruby_min_version):
356 |         version = '2.3'
357 |         mock_run.return_value = 0
358 | 
359 |         with pytest.raises(Exception) as einfo:
360 |             check_supported_ruby_version(version)
361 | 
362 |         error = 'ERROR: Unsupported ruby version specified in .ruby-version.'
363 |         assert str(einfo.value).strip() == error
364 | 
365 |         mock_logger = mock_get_logger.return_value
366 | 
367 |         mock_logger.error.assert_has_calls([
368 |             call('ERROR: Unsupported ruby version specified in .ruby-version.'),
369 |             call('Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.')  # noqa: E501
370 |         ])
371 | 
372 | 
373 | @patch('steps.build.run')
374 | @patch('steps.build.get_logger')
375 | class TestSetupBundler():
376 |     def test_when_no_gemfile_just_load_jekyll(self, mock_get_logger, mock_run, patch_clone_dir):
377 |         setup_bundler(False, None, None)
378 | 
379 |         mock_get_logger.assert_called_once_with('setup-bundler')
380 | 
381 |         mock_logger = mock_get_logger.return_value
382 | 
383 |         mock_logger.info.assert_has_calls([
384 |             call('No Gemfile found, installing Jekyll.')
385 |         ])
386 | 
387 |         mock_run.assert_called_once_with(
388 |             mock_logger, 'gem install jekyll -v 4.2.2 --no-document',
389 |             cwd=patch_clone_dir, env={}, ruby=True
390 |         )
391 | 
392 |     def test_it_uses_default_version_if_only_gemfile_exits(self, mock_get_logger,
393 |                                                            mock_run, patch_clone_dir):
394 |         default_version = '<2'
395 |         create_file(patch_clone_dir / GEMFILE, 'foo')
396 | 
397 |         mock_run.return_value = 0
398 | 
399 |         setup_bundler(False, None, None)
400 | 
401 |         mock_get_logger.assert_called_once_with('setup-bundler')
402 | 
403 |         mock_logger = mock_get_logger.return_value
404 | 
405 |         mock_logger.info.assert_has_calls([
406 |             call('Gemfile found, setting up bundler'),
407 |             call('Installing dependencies in Gemfile'),
408 |         ])
409 | 
410 |         def callp(cmd):
411 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True)
412 | 
413 |         mock_run.assert_has_calls([
414 |             callp(f'gem install bundler --version "{default_version}"'),
415 |             callp('bundle install'),
416 |         ])
417 | 
418 |     def test_it_uses_bundler_version_if_gemfile_and_bundler_file_exists(self, mock_get_logger,
419 |                                                                         mock_run, patch_clone_dir):
420 |         version = '2.0.1'
421 | 
422 |         create_file(patch_clone_dir / GEMFILE, 'foo')
423 |         create_file(patch_clone_dir / BUNDLER_VERSION, version)
424 | 
425 |         mock_run.return_value = 0
426 | 
427 |         setup_bundler(False, None, None)
428 | 
429 |         mock_get_logger.assert_called_once_with('setup-bundler')
430 | 
431 |         mock_logger = mock_get_logger.return_value
432 | 
433 |         mock_logger.info.assert_has_calls([
434 |             call('Gemfile found, setting up bundler'),
435 |             call('Using bundler version in .bundler-version'),
436 |             call('Installing dependencies in Gemfile'),
437 |         ])
438 | 
439 |         def callp(cmd):
440 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True)
441 | 
442 |         mock_run.assert_has_calls([
443 |             callp(f'gem install bundler --version "{version}"'),
444 |             callp('bundle install'),
445 |         ])
446 | 
447 | 
448 | @patch('steps.build.run')
449 | @patch('steps.build.get_logger')
450 | class TestBuildJekyll():
451 |     def test_with_no_gemfile(self, mock_get_logger, mock_run, patch_clone_dir,
452 |                              patch_site_build_dir):
453 |         command = 'jekyll'
454 | 
455 |         create_file(patch_clone_dir / JEKYLL_CONFIG_YML, 'hi: test')
456 | 
457 |         kwargs = dict(
458 |             branch='branch', owner='owner',
459 |             repository='repo', site_prefix='site/prefix',
460 |             base_url='/site/prefix', config=json.dumps(dict(boop='beep'))
461 |         )
462 | 
463 |         build_jekyll(**kwargs)
464 | 
465 |         mock_get_logger.assert_has_calls([call('build-jekyll'), call('build-jekyll')])
466 | 
467 |         mock_logger = mock_get_logger.return_value
468 | 
469 |         env = build_env(
470 |             kwargs['branch'], kwargs['owner'], kwargs['repository'],
471 |             kwargs['site_prefix'], kwargs['base_url']
472 |         )
473 |         env['JEKYLL_ENV'] = 'production'
474 | 
475 |         mock_run.assert_has_calls([
476 |             call(
477 |                 mock_logger,
478 |                 f'echo Building using Jekyll version: $({command} -v)',
479 |                 cwd=patch_clone_dir,
480 |                 env={},
481 |                 ruby=True,
482 |             ),
483 |             call(
484 |                 mock_logger,
485 |                 f'{command} build --destination {patch_site_build_dir}',
486 |                 cwd=patch_clone_dir,
487 |                 env=env,
488 |                 node=True,
489 |                 ruby=True,
490 |             )
491 |         ])
492 | 
493 |     def test_with_gemfile(self, mock_get_logger, mock_run, patch_clone_dir, patch_site_build_dir):
494 |         command = 'bundle exec jekyll'
495 | 
496 |         create_file(patch_clone_dir / GEMFILE, 'foo')
497 |         create_file(patch_clone_dir / JEKYLL_CONFIG_YML, 'hi: test')
498 | 
499 |         kwargs = dict(
500 |             branch='branch', owner='owner',
501 |             repository='repo', site_prefix='site/prefix',
502 |             base_url='/site/prefix', config=json.dumps(dict(boop='beep'))
503 |         )
504 | 
505 |         build_jekyll(**kwargs)
506 | 
507 |         mock_get_logger.assert_has_calls([call('build-jekyll'), call('build-jekyll')])
508 | 
509 |         mock_logger = mock_get_logger.return_value
510 | 
511 |         env = build_env(
512 |             kwargs['branch'], kwargs['owner'], kwargs['repository'],
513 |             kwargs['site_prefix'], kwargs['base_url']
514 |         )
515 |         env['JEKYLL_ENV'] = 'production'
516 | 
517 |         mock_run.assert_has_calls([
518 |             call(
519 |                 mock_logger,
520 |                 f'echo Building using Jekyll version: $({command} -v)',
521 |                 cwd=patch_clone_dir,
522 |                 env={},
523 |                 ruby=True,
524 |             ),
525 |             call(
526 |                 mock_logger,
527 |                 f'{command} build --destination {patch_site_build_dir}',
528 |                 cwd=patch_clone_dir,
529 |                 env=env,
530 |                 node=True,
531 |                 ruby=True,
532 |             )
533 |         ])
534 | 
535 |     def test_config_file_is_updated(self, mock_get_logger, mock_run, patch_clone_dir,
536 |                                     patch_site_build_dir):
537 |         conf_path = patch_clone_dir / JEKYLL_CONFIG_YML
538 |         create_file(conf_path, 'hi: test')
539 | 
540 |         kwargs = dict(
541 |             branch='branch', owner='owner',
542 |             repository='repo', site_prefix='site/prefix',
543 |             config=json.dumps(dict(boop='beep')), base_url='/site/prefix'
544 |         )
545 | 
546 |         build_jekyll(**kwargs)
547 | 
548 |         with conf_path.open() as f:
549 |             config = yaml.safe_load(f)
550 |             assert config['hi'] == 'test'
551 |             assert config['baseurl'] == kwargs['base_url']
552 |             assert config['branch'] == kwargs['branch']
553 | 
554 | 
555 | @patch('steps.build.run')
556 | @patch('steps.build.get_logger')
557 | class TestDownloadHugo():
558 |     def test_it_is_callable(self, mock_get_logger, mock_run, patch_working_dir, patch_clone_dir):
559 |         version = '0.44'
560 |         tar_cmd = f'tar -xzf {patch_working_dir}/hugo.tar.gz -C {patch_working_dir}'
561 |         chmod_cmd = f'chmod +x {patch_working_dir}/hugo'
562 |         dl_url = (
563 |             'https://github.com/gohugoio/hugo/releases/download/v'
564 |             f'{version}/hugo_{version}_Linux-64bit.tar.gz'
565 |         )
566 |         print(dl_url)
567 | 
568 |         create_file(patch_clone_dir / HUGO_VERSION, version)
569 | 
570 |         mock_post_metrics = Mock()
571 |         with requests_mock.Mocker() as m:
572 |             m.get(dl_url, text='fake-data')
573 |             download_hugo(mock_post_metrics)
574 | 
575 |         mock_get_logger.assert_called_once_with('download-hugo')
576 | 
577 |         mock_logger = mock_get_logger.return_value
578 | 
579 |         mock_logger.info.assert_has_calls([
580 |             call('.hugo-version found'),
581 |             call(f'Using hugo version in .hugo-version: {version}'),
582 |             call(f'Downloading hugo version {version}')
583 |         ])
584 | 
585 |         mock_run.assert_has_calls([
586 |             call(mock_logger, tar_cmd, env={}),
587 |             call(mock_logger, chmod_cmd, env={})
588 |         ])
589 | 
590 |     def test_it_is_callable_retry(self, mock_get_logger, mock_run, patch_working_dir,
591 |                                   patch_clone_dir):
592 |         version = '0.44'
593 |         tar_cmd = f'tar -xzf {patch_working_dir}/hugo.tar.gz -C {patch_working_dir}'
594 |         chmod_cmd = f'chmod +x {patch_working_dir}/hugo'
595 |         dl_url = (
596 |             'https://github.com/gohugoio/hugo/releases/download/v'
597 |             f'{version}/hugo_{version}_Linux-64bit.tar.gz'
598 |         )
599 | 
600 |         create_file(patch_clone_dir / HUGO_VERSION, version)
601 | 
602 |         mock_post_metrics = Mock()
603 |         with requests_mock.Mocker() as m:
604 |             m.get(dl_url, [
605 |                 dict(exc=requests.exceptions.ConnectTimeout),
606 |                 dict(exc=requests.exceptions.ConnectTimeout),
607 |                 dict(exc=requests.exceptions.ConnectTimeout),
608 |                 dict(exc=requests.exceptions.ConnectTimeout),
609 |                 dict(text='fake-data')
610 |             ])
611 | 
612 |             download_hugo(mock_post_metrics)
613 | 
614 |         mock_get_logger.assert_called_once_with('download-hugo')
615 | 
616 |         mock_logger = mock_get_logger.return_value
617 | 
618 |         mock_logger.info.assert_has_calls([
619 |             call('.hugo-version found'),
620 |             call(f'Using hugo version in .hugo-version: {version}'),
621 |             call(f'Downloading hugo version {version}'),
622 |             call(f'Failed attempt #1 to download hugo version: {version}'),
623 |             call(f'Failed attempt #2 to download hugo version: {version}'),
624 |             call(f'Failed attempt #3 to download hugo version: {version}'),
625 |             call(f'Failed attempt #4 to download hugo version: {version}'),
626 |         ])
627 | 
628 |         mock_run.assert_has_calls([
629 |             call(mock_logger, tar_cmd, env={}),
630 |             call(mock_logger, chmod_cmd, env={})
631 |         ])
632 | 
633 |     def test_it_is_exception(self, mock_get_logger, mock_run, patch_working_dir, patch_clone_dir):
634 |         version = '0.44'
635 |         dl_url = (
636 |             'https://github.com/gohugoio/hugo/releases/download/v'
637 |             f'{version}/hugo_{version}_Linux-64bit.tar.gz'
638 |         )
639 | 
640 |         create_file(patch_clone_dir / HUGO_VERSION, version)
641 | 
642 |         mock_post_metrics = Mock()
643 |         with pytest.raises(Exception):
644 |             with requests_mock.Mocker() as m:
645 |                 m.get(dl_url, [
646 |                     dict(exc=requests.exceptions.ConnectTimeout),
647 |                     dict(exc=requests.exceptions.ConnectTimeout),
648 |                     dict(exc=requests.exceptions.ConnectTimeout),
649 |                     dict(exc=requests.exceptions.ConnectTimeout),
650 |                     dict(exc=requests.exceptions.ConnectTimeout),
651 |                 ])
652 | 
653 |                 download_hugo(mock_post_metrics)
654 | 
655 |         mock_get_logger.assert_called_once_with('download-hugo')
656 | 
657 |         mock_logger = mock_get_logger.return_value
658 | 
659 |         mock_logger.info.assert_has_calls([
660 |             call('.hugo-version found'),
661 |             call(f'Using hugo version in .hugo-version: {version}'),
662 |             call(f'Downloading hugo version {version}'),
663 |             call(f'Failed attempt #1 to download hugo version: {version}'),
664 |             call(f'Failed attempt #2 to download hugo version: {version}'),
665 |             call(f'Failed attempt #3 to download hugo version: {version}'),
666 |             call(f'Failed attempt #4 to download hugo version: {version}'),
667 |             call(f'Failed attempt #5 to download hugo version: {version}'),
668 |         ])
669 | 
670 |         mock_run.assert_not_called()
671 | 
672 | 
673 | @patch('steps.build.run')
674 | @patch('steps.build.get_logger')
675 | class TestBuildHugo():
676 |     def test_it_calls_hugo_as_expected(self, mock_get_logger, mock_run,
677 |                                        patch_working_dir, patch_clone_dir,
678 |                                        patch_site_build_dir):
679 | 
680 |         hugo_path = patch_working_dir / HUGO_BIN
681 |         hugo_call = (
682 |             f'{hugo_path} --source {patch_clone_dir} '
683 |             f'--destination {patch_site_build_dir} '
684 |             '--baseURL /site/prefix'
685 |         )
686 | 
687 |         kwargs = dict(
688 |             branch='branch',
689 |             owner='owner',
690 |             repository='repo',
691 |             site_prefix='site/prefix',
692 |             base_url='/site/prefix'
693 |         )
694 | 
695 |         build_hugo(**kwargs)
696 | 
697 |         mock_get_logger.assert_called_once_with('build-hugo')
698 | 
699 |         mock_logger = mock_get_logger.return_value
700 | 
701 |         mock_logger.info.assert_called_with(
702 |             'Building site with hugo'
703 |         )
704 | 
705 |         mock_run.assert_has_calls([
706 |             call(
707 |                 mock_logger,
708 |                 f'echo hugo version: $({hugo_path} version)',
709 |                 env={},
710 |             ),
711 |             call(
712 |                 mock_logger,
713 |                 hugo_call,
714 |                 cwd=patch_clone_dir,
715 |                 env=build_env(*kwargs.values()),
716 |                 node=True,
717 |             )
718 |         ])
719 | 
720 | 
721 | class TestBuildstatic():
722 |     def test_it_moves_files_correctly(self, patch_site_build_dir, patch_clone_dir):
723 |         for i in range(0, 10):
724 |             create_file(patch_clone_dir / f'file_{i}.txt', str(i))
725 | 
726 |         assert len(os.listdir(patch_clone_dir)) == 10
727 |         assert len(os.listdir(patch_site_build_dir)) == 0
728 | 
729 |         build_static()
730 | 
731 |         assert len(os.listdir(patch_clone_dir)) == 0
732 |         assert len(os.listdir(patch_site_build_dir)) == 10
733 | 
734 | 
735 | class TestBuildEnv():
736 |     def test_it_includes_default_values(self):
737 |         branch = 'branch'
738 |         owner = 'owner'
739 |         repository = 'repo'
740 |         site_prefix = 'prefix'
741 |         base_url = 'url'
742 | 
743 |         result = build_env(branch, owner, repository, site_prefix, base_url)
744 | 
745 |         assert result == {
746 |             'BRANCH': branch,
747 |             'OWNER': owner,
748 |             'REPOSITORY': repository,
749 |             'SITE_PREFIX': site_prefix,
750 |             'BASEURL': base_url,
751 |             'LANG': 'en_US.UTF-8',
752 |             'GATSBY_TELEMETRY_DISABLED': '1',
753 |             'HOME': '/home/customer',
754 |         }
755 | 
756 |     def test_it_includes_user_env_vars(self):
757 |         branch = 'branch'
758 |         owner = 'owner'
759 |         repository = 'repo'
760 |         site_prefix = 'prefix'
761 |         base_url = 'url'
762 |         user_env_vars = [
763 |             {'name': 'FOO', 'value': 'bar'}
764 |         ]
765 | 
766 |         result = build_env(branch, owner, repository, site_prefix,
767 |                            base_url, user_env_vars)
768 | 
769 |         assert result['FOO'] == 'bar'
770 | 
771 |     @patch('sys.stdout', new_callable=StringIO)
772 |     def test_it_ignores_and_warns_duplicate_user_env_vars(self, mock_stdout):
773 |         # and it is case insensitive
774 |         branch = 'branch'
775 |         owner = 'owner'
776 |         repository = 'repo'
777 |         site_prefix = 'prefix'
778 |         base_url = 'url'
779 |         user_env_vars = [
780 |             {'name': 'BASEURL', 'value': 'bar'},
781 |             {'name': 'repository', 'value': 'baz'}
782 |         ]
783 | 
784 |         result = build_env(branch, owner, repository, site_prefix,
785 |                            base_url, user_env_vars)
786 | 
787 |         assert result['BASEURL'] == base_url
788 |         assert result['REPOSITORY'] == repository
789 |         assert ('user environment variable name `BASEURL` conflicts'
790 |                 ' with system environment variable, it will be ignored.'
791 |                 ) in mock_stdout.getvalue()
792 |         assert ('user environment variable name `repository`'
793 |                 ' conflicts with system environment variable, it will be'
794 |                 ' ignored.') in mock_stdout.getvalue()
795 | 
796 | 
797 | @patch('steps.build.run')
798 | @patch('steps.build.get_logger')
799 | @patch('steps.build.CacheFolder')
800 | @patch('steps.build.subprocess.run')
801 | class TestBuildCache():
802 |     def test_it_uses_ruby_cache_when_gemfile_lock(self, mock_sp_run, mock_cache_folder,
803 |                                                   mock_get_logger, mock_run, patch_clone_dir):
804 |         default_version = '<2'
805 |         create_file(patch_clone_dir / GEMFILE, 'foo')
806 |         create_file(patch_clone_dir / GEMFILELOCK, contents='hashable')
807 | 
808 |         mock_run.return_value = 0
809 | 
810 |         setup_bundler(True, None, None)
811 | 
812 |         mock_get_logger.assert_called_once_with('setup-bundler')
813 | 
814 |         mock_logger = mock_get_logger.return_value
815 | 
816 |         mock_logger.info.assert_has_calls([
817 |             call('Gemfile found, setting up bundler'),
818 |             call(f'{GEMFILELOCK} found. Attempting to download cache'),
819 |             call('Installing dependencies in Gemfile'),
820 |         ])
821 | 
822 |         mock_cache_folder.assert_called_once()
823 | 
824 |         def callp(cmd, skip_log=False):
825 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True)
826 | 
827 |         mock_run.assert_has_calls([
828 |             callp(f'gem install bundler --version "{default_version}"'),
829 |             callp('bundle install'),
830 |         ])
831 | 
832 |     def test_it_uses_node_cache_when_package_lock(self, mock_sp_run, mock_cache_folder,
833 |                                                   mock_get_logger, mock_run, patch_clone_dir):
834 |         create_file(patch_clone_dir / PACKAGE_JSON)
835 |         create_file(patch_clone_dir / PACKAGE_LOCK, contents='hashable')
836 | 
837 |         mock_post_metrics = Mock()
838 |         setup_node(True, None, None, mock_post_metrics)
839 | 
840 |         mock_get_logger.assert_called_once_with('setup-node')
841 | 
842 |         mock_logger = mock_get_logger.return_value
843 | 
844 |         mock_logger.info.assert_has_calls([
845 |             call('Using default node version'),
846 |             call(f'{PACKAGE_LOCK} found. Attempting to download cache'),
847 |             call('skipping npm ci and using cache')
848 |         ])
849 | 
850 |         mock_cache_folder.assert_called_once()
851 | 
852 |         def callp(cmd, skip_log=False):
853 |             return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, node=True, skip_log=skip_log)
854 | 
855 |         mock_run.assert_has_calls([
856 |             callp('echo Node version: $(node --version)'),
857 |             callp('echo NPM version: $(npm --version)'),
858 |         ])
859 | 


--------------------------------------------------------------------------------
/test/test_cache.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import boto3
 3 | import pytest
 4 | import tempfile
 5 | import filecmp
 6 | import shutil
 7 | 
 8 | from moto import mock_aws
 9 | 
10 | from steps.cache import CacheFolder, get_checksum
11 | from log_utils import get_logger
12 | 
13 | 
14 | @pytest.fixture
15 | def aws_credentials():
16 |     """Mocked AWS Credentials for moto."""
17 |     os.environ["AWS_ACCESS_KEY_ID"] = "testing"
18 |     os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
19 |     os.environ["AWS_DEFAULT_REGION"] = "testing"
20 | 
21 | 
22 | @pytest.fixture
23 | def s3_client(aws_credentials):
24 |     with mock_aws():
25 |         conn = boto3.client("s3")
26 |         yield conn
27 | 
28 | 
29 | @pytest.fixture
30 | def bucket(s3_client):
31 |     s3_client.create_bucket(
32 |         Bucket='testing',
33 |         CreateBucketConfiguration={"LocationConstraint": "testing"}
34 |     )
35 |     yield
36 | 
37 | 
38 | @pytest.fixture
39 | def gemfile():
40 |     tmp_file = tempfile.NamedTemporaryFile(delete=False)
41 |     tmp_file.write(b'source "https://rubygems.org"')
42 |     tmp_file.write(b'gem "jekyll", "~> 4.0"')
43 |     yield tmp_file.name
44 | 
45 | 
46 | @pytest.fixture(autouse=True)
47 | def cache_folder(s3_client, bucket, gemfile, tmpdir):
48 |     logger = get_logger('testing')
49 |     yield CacheFolder(gemfile, tmpdir, 'testing', s3_client, logger)
50 | 
51 | 
52 | class TestCache():
53 |     def test_cache_operations(self, cache_folder: CacheFolder):
54 |         # first the cache isn't there
55 |         assert not cache_folder.exists()
56 | 
57 |         # add some files and cache them
58 |         FILES_TO_CACHE = 5
59 |         for _ in range(FILES_TO_CACHE):
60 |             tempfile.NamedTemporaryFile(dir=cache_folder.local_folder, delete=False)
61 |         cache_folder.zip_upload_folder_to_s3()
62 | 
63 |         # now the cache exists
64 |         assert cache_folder.exists()
65 | 
66 |         # move the old files to a new directory for comparison
67 |         with tempfile.TemporaryDirectory() as download_tmp_dir:
68 |             for f in os.listdir(cache_folder.local_folder):
69 |                 shutil.move(
70 |                     os.path.join(cache_folder.local_folder, f),
71 |                     os.path.join(download_tmp_dir, f)
72 |                 )
73 | 
74 |             # download the cache and compare
75 |             cache_folder.download_unzip()
76 |             dir_comp = filecmp.dircmp(cache_folder.local_folder, download_tmp_dir)
77 |             assert len(dir_comp.common) == FILES_TO_CACHE
78 |             assert len(dir_comp.diff_files) == 0
79 | 
80 |     def test_checksum(self, gemfile):
81 |         c = get_checksum(gemfile)
82 |         assert c == 'd41d8cd98f00b204e9800998ecf8427e'
83 | 


--------------------------------------------------------------------------------
/test/test_crypto.py:
--------------------------------------------------------------------------------
 1 | from crypto.decrypt import decrypt
 2 | 
 3 | 
 4 | def test_decrypt():
 5 |     ciphertext = ('6a7495108a7f8c9ab4d0990854240242:'
 6 |                   'e05f0d25446be83fa92aa9586610496b:'
 7 |                   '560d3e8ff02f852104417a')
 8 |     key = 'shhhhhhh'
 9 | 
10 |     expected = 'hello world'
11 | 
12 |     result = decrypt(ciphertext, key)
13 | 
14 |     assert result == expected
15 | 


--------------------------------------------------------------------------------
/test/test_fetch.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import unittest
  3 | from unittest.mock import patch
  4 | # import subprocess  # nosec
  5 | import pytest
  6 | 
  7 | from steps import fetch_repo, update_repo
  8 | from common import CLONE_DIR_PATH
  9 | 
 10 | clone_env = {
 11 |     'HOME': '/home'
 12 | }
 13 | 
 14 | 
 15 | @patch('steps.fetch.run')
 16 | @patch('steps.fetch.get_logger')
 17 | class TestCloneRepo():
 18 |     def test_runs_expected_cmds(self, mock_get_logger, mock_run):
 19 |         owner = 'owner-1'
 20 |         repository = 'repo-1'
 21 |         branch = 'main'
 22 | 
 23 |         command = (f'git clone -b {branch} --single-branch --depth 1 '
 24 |                    f'https://github.com/{owner}/{repository}.git '
 25 |                    f'{CLONE_DIR_PATH}')
 26 | 
 27 |         fetch_repo(owner, repository, branch)
 28 | 
 29 |         mock_get_logger.assert_called_once_with('clone')
 30 | 
 31 |         mock_run.assert_called_once_with(mock_get_logger.return_value, command, env=clone_env, check=False)  # noqa: 501
 32 | 
 33 |     def test_runs_expected_cmds_with_gh_token(self, mock_get_logger, mock_run):
 34 |         owner = 'owner-2'
 35 |         repository = 'repo-2'
 36 |         branch = 'staging'
 37 |         github_token = 'ABC123'
 38 | 
 39 |         command = (f'git clone -b {branch} --single-branch --depth 1 '
 40 |                    f'https://{github_token}@github.com/{owner}/{repository}.git '
 41 |                    f'{CLONE_DIR_PATH}')
 42 | 
 43 |         fetch_repo(owner, repository, branch, github_token)
 44 | 
 45 |         mock_get_logger.assert_called_once_with('clone')
 46 | 
 47 |         mock_run.assert_called_once_with(mock_get_logger.return_value, command, env=clone_env, check=False)  # noqa: 501
 48 | 
 49 | 
 50 | class TestCloneRepoNoMock(unittest.TestCase):
 51 |     @pytest.fixture(autouse=True)
 52 |     def inject_fixtures(self, caplog):
 53 |         self._caplog = caplog
 54 | 
 55 |     def test_no_github_permission_warning(self):
 56 |         owner = 'cloud-gov'
 57 |         repository = 'cg-site'
 58 |         branch = 'master'
 59 | 
 60 |         # TODO: this is a totally useless test because the CI runner doesn't have git
 61 |         with self._caplog.at_level(logging.INFO):
 62 |             fetch_repo(owner, repository, branch)
 63 | 
 64 |         assert self._caplog.text
 65 |         assert 'Permission denied' not in self._caplog.text
 66 | 
 67 | 
 68 | @patch('steps.fetch.run')
 69 | @patch('steps.fetch.get_logger')
 70 | class TestUpdateRepo():
 71 |     def test_runs_expected_cmds(self, mock_get_logger, mock_run):
 72 |         clone_dir = 'clone_dir'
 73 | 
 74 |         command = 'git pull --unshallow'
 75 | 
 76 |         update_repo(clone_dir)
 77 | 
 78 |         mock_get_logger.assert_called_once_with('update')
 79 | 
 80 |         mock_run.assert_called_once_with(mock_get_logger.return_value, command, cwd=clone_dir)
 81 | 
 82 | 
 83 | # @patch('steps.fetch.subprocess.run')
 84 | # @patch('steps.fetch.get_logger')
 85 | # class TestFetchCommitSHA():
 86 | #     def test_runs_expected_cmds(self, mock_get_logger, mock_run):
 87 | #         mock_run.return_value = subprocess.CompletedProcess([], 0, 'commit testSha blah blah')
 88 | #         clone_dir = 'clone_dir'
 89 | 
 90 | #         command = ['git', 'log', '-1']
 91 | #         commit_sha = fetch_commit_sha(clone_dir)
 92 | 
 93 | #         mock_get_logger.assert_called_once_with('clone')
 94 | #         mock_run.assert_called_once_with(
 95 | #             command,
 96 | #             shell=False,  # nosec
 97 | #             check=True,
 98 | #             stdout=subprocess.PIPE,
 99 | #             universal_newlines=True,
100 | #             cwd=clone_dir
101 | #         )
102 | #         assert commit_sha == 'testSha'
103 | 


--------------------------------------------------------------------------------
/test/test_log_utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from unittest.mock import patch
  3 | from time import sleep
  4 | 
  5 | from log_utils.get_logger import (
  6 |     LogFilter, Formatter, get_logger, init_logging,
  7 |     set_log_attrs, DEFAULT_LOG_LEVEL)
  8 | from log_utils.db_handler import DBHandler
  9 | from log_utils.monitoring import RepeatTimer
 10 | 
 11 | 
 12 | class TestLogFilter():
 13 |     def test_it_filters_message_with_default_mask(self):
 14 |         priv_values = ['foobar']
 15 |         msg = 'hellofoobar'
 16 | 
 17 |         filter = LogFilter(priv_values)
 18 |         record = logging.makeLogRecord({'msg': msg})
 19 |         result = filter.filter(record)
 20 | 
 21 |         assert(result is True)
 22 |         assert(record.getMessage() == f'hello{LogFilter.DEFAULT_MASK}')
 23 | 
 24 |     def test_it_filters_message_with_custom_mask(self):
 25 |         priv_values = ['foobar']
 26 |         mask = 'TheNumber42'
 27 |         msg = 'hellofoobar'
 28 | 
 29 |         filter = LogFilter(priv_values, mask)
 30 |         record = logging.makeLogRecord({'msg': msg})
 31 |         result = filter.filter(record)
 32 | 
 33 |         assert(result is True)
 34 |         assert(record.getMessage() == f'hello{mask}')
 35 | 
 36 |     def test_it_does_not_log_empty_messages(self):
 37 |         priv_values = []
 38 |         msg = ''
 39 | 
 40 |         filter = LogFilter(priv_values)
 41 |         record = logging.makeLogRecord({'msg': msg})
 42 |         result = filter.filter(record)
 43 | 
 44 |         assert(result is False)
 45 | 
 46 |     def test_it_replaces_message_invalid_access_key(self):
 47 |         priv_values = []
 48 |         msg = f'hello{LogFilter.INVALID_ACCESS_KEY}'
 49 | 
 50 |         filter = LogFilter(priv_values)
 51 |         record = logging.makeLogRecord({'msg': msg})
 52 |         result = filter.filter(record)
 53 | 
 54 |         assert(result is True)
 55 |         assert(record.getMessage() == (
 56 |             'Whoops, our S3 keys were rotated during your '
 57 |             'build and became out of date. This was not a '
 58 |             'problem with your site build, but if you restart '
 59 |             'the failed build it should work on the next try. '
 60 |             'Sorry for the inconvenience!'
 61 |         ))
 62 | 
 63 | 
 64 | class TestFormatter():
 65 |     @patch('logging.Formatter.format')
 66 |     def test_it_populates_empty_strings_if_key_is_missing(self, mock_format):
 67 |         keys = ['foobar']
 68 | 
 69 |         formatter = Formatter(keys)
 70 |         record = logging.makeLogRecord({})
 71 | 
 72 |         formatter.format(record)
 73 | 
 74 |         assert(record.foobar == '')
 75 |         mock_format.assert_called_once_with(record)
 76 | 
 77 |     @patch('logging.Formatter.format')
 78 |     def test_it_ignores_key_if_present(self, mock_format):
 79 |         keys = ['foobar']
 80 | 
 81 |         formatter = Formatter(keys)
 82 |         record = logging.makeLogRecord({'foobar': 'Hello!'})
 83 | 
 84 |         formatter.format(record)
 85 | 
 86 |         assert(record.foobar == 'Hello!')
 87 |         mock_format.assert_called_once_with(record)
 88 | 
 89 | 
 90 | class TestGetLogger():
 91 |     def test_it_returns_a_logger_with_an_adapter_with_extras(self):
 92 |         name = 'foobar'
 93 |         attrs = {'foo': 'bar'}
 94 |         set_log_attrs(attrs)
 95 | 
 96 |         adapter = get_logger(name)
 97 | 
 98 |         assert(type(adapter) == logging.LoggerAdapter)
 99 |         assert(adapter.logger.name == name)
100 |         assert(adapter.extra == attrs)
101 | 
102 | 
103 | @patch('psycopg2.connect')
104 | @patch('logging.basicConfig')
105 | class TestInitLogging():
106 |     def test_it_adds_a_stream_and_db_handlers(self, mock_basic_config, _):
107 |         init_logging([], {'buildid': 1234}, 'foo')
108 | 
109 |         _, kwargs = mock_basic_config.call_args
110 | 
111 |         assert(kwargs['level'] == DEFAULT_LOG_LEVEL)
112 |         assert(len(kwargs['handlers']) == 2)
113 |         assert(type(kwargs['handlers'][0]) == logging.StreamHandler)
114 |         assert(type(kwargs['handlers'][1]) == DBHandler)
115 | 
116 | 
117 | @patch('log_utils.monitoring.log_monitoring_metrics')
118 | class TestMonitorLogging():
119 |     def test_it_calls_logger_on_schedule(self, mock_metrics_logger):
120 |         logger = get_logger('test')
121 |         thread = RepeatTimer(1, mock_metrics_logger, [logger])
122 |         thread.start()
123 |         sleep(5)
124 |         mock_metrics_logger.assert_called_with(logger)
125 |         thread.cancel()
126 | 


--------------------------------------------------------------------------------
/test/test_publish.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock
 2 | 
 3 | from steps import publish
 4 | from common import SITE_BUILD_DIR_PATH
 5 | 
 6 | TEST_BUCKET = 'test-bucket'
 7 | 
 8 | 
 9 | class TestPublish():
10 |     def test_it_calls_publish_to_s3(self, monkeypatch):
11 |         mock_publish_to_s3 = Mock()
12 |         monkeypatch.setattr('publishing.s3publisher.publish_to_s3',
13 |                             mock_publish_to_s3)
14 | 
15 |         kwargs = dict(
16 |             base_url='/site/prefix',
17 |             site_prefix='site/prefix',
18 |             bucket=TEST_BUCKET,
19 |             federalist_config={},
20 |             s3_client=None
21 |         )
22 | 
23 |         publish(**kwargs)
24 | 
25 |         mock_publish_to_s3.assert_called_once()
26 | 
27 |         # check that the `directory` kwarg is a string, not a Path
28 |         _, actual_kwargs = mock_publish_to_s3.call_args_list[0]
29 |         assert type(actual_kwargs['directory']) == str
30 |         assert actual_kwargs['directory'] == str(SITE_BUILD_DIR_PATH)
31 | 


--------------------------------------------------------------------------------
/test/test_remote_logs.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | from log_utils.remote_logs import (
 4 |     b64string, post_build_complete,
 5 |     post_build_error, post_build_timeout,
 6 |     post_build_processing)
 7 | 
 8 | from log_utils.common import (STATUS_COMPLETE, STATUS_ERROR, STATUS_PROCESSING)
 9 | 
10 | MOCK_STATUS_URL = 'https://status.example.com'
11 | 
12 | 
13 | class TestPostBuildComplete():
14 |     @patch('requests.post')
15 |     @patch('requests.delete')
16 |     def test_it_works(self, mock_del, mock_post):
17 |         commit_sha = 'testSha1'
18 |         post_build_complete(MOCK_STATUS_URL, commit_sha)
19 |         mock_post.assert_called_once_with(
20 |             MOCK_STATUS_URL,
21 |             json={'status': STATUS_COMPLETE, 'message': '', 'commit_sha': commit_sha},
22 |             timeout=10
23 |         )
24 | 
25 | 
26 | class TestPostBuildProcessing():
27 |     @patch('requests.post')
28 |     def test_it_works(self, mock_post):
29 |         post_build_processing(MOCK_STATUS_URL)
30 |         mock_post.assert_called_once_with(
31 |             MOCK_STATUS_URL,
32 |             json={'status': STATUS_PROCESSING, 'message': '', 'commit_sha': None},
33 |             timeout=10
34 |         )
35 | 
36 | 
37 | class TestPostBuildError():
38 |     @patch('requests.post')
39 |     @patch('requests.delete')
40 |     def test_it_works(self, mock_del, mock_post):
41 |         commit_sha = 'testSha2'
42 |         post_build_error(MOCK_STATUS_URL, 'error msg', commit_sha)
43 | 
44 |         assert mock_post.call_count == 1
45 | 
46 |         mock_post.assert_any_call(
47 |             MOCK_STATUS_URL,
48 |             json={
49 |                 'status': STATUS_ERROR, 'message': b64string('error msg'), 'commit_sha': commit_sha
50 |             },
51 |             timeout=10
52 |         )
53 | 
54 | 
55 | class TestPostBuildTimeout():
56 |     @patch('requests.post')
57 |     @patch('requests.delete')
58 |     def test_it_works(self, mock_del, mock_post):
59 |         commit_sha = 'testSha3'
60 |         post_build_timeout(MOCK_STATUS_URL, commit_sha)
61 | 
62 |         expected_output = b64string(
63 |             'The build did not complete. It may have timed out.')
64 | 
65 |         assert mock_post.call_count == 1
66 |         mock_post.assert_any_call(
67 |             MOCK_STATUS_URL,
68 |             json={'status': STATUS_ERROR, 'message': expected_output, 'commit_sha': commit_sha},
69 |             timeout=10
70 |         )
71 | 


--------------------------------------------------------------------------------
/test/test_repo_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import repo_config
 4 | import pytest
 5 | from .support import create_file, patch_dir
 6 | import steps
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def patch_clone_dir(monkeypatch):
11 |     yield from patch_dir(monkeypatch, steps.build, 'CLONE_DIR_PATH')
12 | 
13 | 
14 | class TestRepoConfig():
15 |     def test_it_loads_federalist_json_when_it_exists(self, patch_clone_dir):
16 |         filename = 'federalist.json'
17 |         json_contents = json.dumps({
18 |             'name': filename,
19 |         })
20 |         create_file(patch_clone_dir / filename, contents=json_contents)
21 |         result = repo_config.from_json_file(patch_clone_dir)
22 |         assert result.config['name'] == filename
23 |         assert len(os.listdir(patch_clone_dir)) == 1
24 | 
25 |     def test_it_loads_pages_json_when_it_exists(self, patch_clone_dir):
26 |         filename = 'pages.json'
27 |         json_contents = json.dumps({
28 |             'name': filename,
29 |         })
30 |         create_file(patch_clone_dir / filename, contents=json_contents)
31 |         result = repo_config.from_json_file(patch_clone_dir)
32 |         assert result.config['name'] == filename
33 |         assert len(os.listdir(patch_clone_dir)) == 1
34 | 
35 |     def test_it_loads_pages_json_when_federalist_json_also_exists(self, patch_clone_dir):
36 |         filename = 'federalist.json'
37 |         json_contents = json.dumps({
38 |             'name': filename,
39 |         })
40 |         create_file(patch_clone_dir / filename, contents=json_contents)
41 |         filename = 'pages.json'
42 |         json_contents = json.dumps({
43 |             'name': filename,
44 |         })
45 |         create_file(patch_clone_dir / filename, contents=json_contents)
46 |         result = repo_config.from_json_file(patch_clone_dir)
47 |         assert result.config['name'] == 'pages.json'
48 |         assert len(os.listdir(patch_clone_dir)) == 2
49 | 


--------------------------------------------------------------------------------
/test/test_runner.py:
--------------------------------------------------------------------------------
  1 | from pytest import raises
  2 | import shlex
  3 | import subprocess  # nosec
  4 | from unittest.mock import Mock, patch
  5 | 
  6 | from runner import run, setuser, NVM_PATH, RVM_PATH
  7 | 
  8 | 
  9 | @patch('subprocess.Popen', autospec=True)
 10 | def test_run(mock_popen):
 11 |     mock_logger = Mock()
 12 |     command = 'foobar'
 13 | 
 14 |     mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar')))
 15 | 
 16 |     run(mock_logger, command)
 17 | 
 18 |     mock_popen.assert_called_once_with(
 19 |         shlex.split(command),
 20 |         cwd=None,
 21 |         env=None,
 22 |         shell=False,
 23 |         executable=None,
 24 |         stderr=subprocess.STDOUT,
 25 |         stdout=subprocess.PIPE,
 26 |         bufsize=1,
 27 |         encoding='utf-8',
 28 |         text=True,
 29 |         preexec_fn=setuser,
 30 |     )
 31 | 
 32 |     mock_logger.info.assert_called_once_with('foobar')
 33 | 
 34 | 
 35 | @patch('subprocess.Popen', autospec=True)
 36 | def test_run_popen_failure(mock_popen):
 37 |     mock_logger = Mock()
 38 |     command = 'foobar'
 39 | 
 40 |     mock_popen.side_effect = ValueError('ugh')
 41 | 
 42 |     with raises(ValueError):
 43 |         run(mock_logger, command)
 44 | 
 45 |     mock_popen.assert_called_once_with(
 46 |         shlex.split(command),
 47 |         cwd=None,
 48 |         env=None,
 49 |         shell=False,
 50 |         executable=None,
 51 |         stderr=subprocess.STDOUT,
 52 |         stdout=subprocess.PIPE,
 53 |         bufsize=1,
 54 |         encoding='utf-8',
 55 |         text=True,
 56 |         preexec_fn=setuser,
 57 |     )
 58 | 
 59 |     mock_logger.error.assert_any_call('Encountered a problem invoking Popen.')
 60 |     mock_logger.error.assert_any_call('ugh')
 61 | 
 62 | 
 63 | @patch('subprocess.Popen', autospec=True)
 64 | def test_run_popen_failure_check_false(mock_popen):
 65 |     mock_logger = Mock()
 66 |     command = 'foobar'
 67 |     return_code = 1
 68 | 
 69 |     mock_popen.side_effect = ValueError('ugh')
 70 | 
 71 |     result = run(mock_logger, command, check=False)
 72 | 
 73 |     assert result == return_code
 74 | 
 75 |     mock_popen.assert_called_once_with(
 76 |         shlex.split(command),
 77 |         cwd=None,
 78 |         env=None,
 79 |         shell=False,
 80 |         executable=None,
 81 |         stderr=subprocess.STDOUT,
 82 |         stdout=subprocess.PIPE,
 83 |         bufsize=1,
 84 |         encoding='utf-8',
 85 |         text=True,
 86 |         preexec_fn=setuser,
 87 |     )
 88 | 
 89 |     mock_logger.error.assert_any_call('Encountered a problem invoking Popen.')
 90 |     mock_logger.error.assert_any_call('ugh')
 91 | 
 92 | 
 93 | @patch('subprocess.Popen', autospec=True)
 94 | def test_run_popen_output(mock_popen):
 95 |     mock_logger = Mock()
 96 |     command = 'foobar'
 97 | 
 98 |     string_output = 'string_output'
 99 |     mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value=string_output)))  # noqa: E501
100 | 
101 |     result = run(mock_logger, command)
102 |     assert result == string_output
103 | 
104 |     mock_popen.assert_called_once_with(
105 |         shlex.split(command),
106 |         cwd=None,
107 |         env=None,
108 |         shell=False,
109 |         executable=None,
110 |         stderr=subprocess.STDOUT,
111 |         stdout=subprocess.PIPE,
112 |         bufsize=1,
113 |         encoding='utf-8',
114 |         text=True,
115 |         preexec_fn=setuser,
116 |     )
117 | 
118 | 
119 | @patch('subprocess.Popen', autospec=True)
120 | def test_run_os_failure_check_false(mock_popen):
121 |     mock_logger = Mock()
122 |     command = 'foobar'
123 | 
124 |     mock_popen.side_effect = OSError('ugh')
125 | 
126 |     result = run(mock_logger, command, check=False)
127 | 
128 |     mock_popen.assert_called_once_with(
129 |         shlex.split(command),
130 |         cwd=None,
131 |         env=None,
132 |         shell=False,
133 |         executable=None,
134 |         stderr=subprocess.STDOUT,
135 |         stdout=subprocess.PIPE,
136 |         bufsize=1,
137 |         encoding='utf-8',
138 |         text=True,
139 |         preexec_fn=setuser
140 |     )
141 | 
142 |     mock_logger.error.assert_any_call(
143 |         'Encountered a problem executing `' + ' '.join(shlex.split(command)) + '`.'
144 |     )
145 |     mock_logger.error.assert_any_call('ugh')
146 | 
147 |     assert result == 1
148 | 
149 | 
150 | @patch('subprocess.Popen', autospec=True)
151 | def test_run_os_failure_check_true(mock_popen):
152 |     mock_logger = Mock()
153 |     command = 'foobar'
154 | 
155 |     mock_popen.side_effect = OSError('ugh')
156 | 
157 |     with raises(OSError, match='ugh'):
158 |         run(mock_logger, command)
159 | 
160 |     mock_popen.assert_called_once_with(
161 |         shlex.split(command),
162 |         cwd=None,
163 |         env=None,
164 |         shell=False,
165 |         executable=None,
166 |         stderr=subprocess.STDOUT,
167 |         stdout=subprocess.PIPE,
168 |         bufsize=1,
169 |         encoding='utf-8',
170 |         text=True,
171 |         preexec_fn=setuser
172 |     )
173 | 
174 |     mock_logger.error.assert_any_call(
175 |         'Encountered a problem executing `' + ' '.join(shlex.split(command)) + '`.'
176 |     )
177 |     mock_logger.error.assert_any_call('ugh')
178 | 
179 | 
180 | @patch('subprocess.Popen', autospec=True)
181 | def test_run_command_failure_check_false(mock_popen):
182 |     mock_logger = Mock()
183 |     command = 'foobar'
184 |     return_code = 2
185 | 
186 |     mock_popen.return_value = Mock(returncode=return_code, stdout=Mock(readline=Mock(return_value='text')))  # noqa: E501
187 | 
188 |     result = run(mock_logger, command, check=False)
189 | 
190 |     mock_popen.assert_called_once_with(
191 |         shlex.split(command),
192 |         cwd=None,
193 |         env=None,
194 |         shell=False,
195 |         executable=None,
196 |         stderr=subprocess.STDOUT,
197 |         stdout=subprocess.PIPE,
198 |         bufsize=1,
199 |         encoding='utf-8',
200 |         text=True,
201 |         preexec_fn=setuser
202 |     )
203 | 
204 |     assert result == return_code
205 | 
206 | 
207 | @patch('subprocess.Popen', autospec=True)
208 | def test_run_command_failure_check_true(mock_popen):
209 |     mock_logger = Mock()
210 |     command = 'foobar'
211 |     return_code = 2
212 | 
213 |     mock_popen.return_value = Mock(returncode=return_code, stdout=Mock(readline=Mock(return_value='text')))  # noqa: E501
214 | 
215 |     with raises(subprocess.CalledProcessError):
216 |         run(mock_logger, command)
217 | 
218 |     mock_popen.assert_called_once_with(
219 |         shlex.split(command),
220 |         cwd=None,
221 |         env=None,
222 |         shell=False,
223 |         executable=None,
224 |         stderr=subprocess.STDOUT,
225 |         stdout=subprocess.PIPE,
226 |         bufsize=1,
227 |         encoding='utf-8',
228 |         text=True,
229 |         preexec_fn=setuser
230 |     )
231 | 
232 | 
233 | @patch('subprocess.Popen', autospec=True)
234 | def test_run_with_node(mock_popen):
235 |     mock_logger = Mock()
236 |     command = 'foobar'
237 |     cwd = '/foo'
238 |     env = {}
239 | 
240 |     mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar')))
241 | 
242 |     run(mock_logger, command, cwd=cwd, env=env, node=True)
243 | 
244 |     mock_popen.assert_called_once_with(
245 |         f'source {NVM_PATH} && {command}',
246 |         cwd=cwd,
247 |         env=env,
248 |         shell=True,  # nosec
249 |         executable='/bin/bash',
250 |         stderr=subprocess.STDOUT,
251 |         stdout=subprocess.PIPE,
252 |         bufsize=1,
253 |         encoding='utf-8',
254 |         text=True,
255 |         preexec_fn=setuser
256 |     )
257 | 
258 | 
259 | @patch('subprocess.Popen', autospec=True)
260 | def test_run_with_ruby(mock_popen):
261 |     mock_logger = Mock()
262 |     command = 'foobar'
263 |     cwd = '/foo'
264 |     env = {}
265 | 
266 |     mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar')))
267 | 
268 |     run(mock_logger, command, cwd=cwd, env=env, ruby=True)
269 | 
270 |     mock_popen.assert_called_once_with(
271 |         f'source {RVM_PATH} && {command}',
272 |         cwd=cwd,
273 |         env=env,
274 |         shell=True,  # nosec
275 |         executable='/bin/bash',
276 |         stderr=subprocess.STDOUT,
277 |         stdout=subprocess.PIPE,
278 |         bufsize=1,
279 |         encoding='utf-8',
280 |         text=True,
281 |         preexec_fn=setuser
282 |     )
283 | 
284 | 
285 | def test_access_environ():
286 |     mock_logger = Mock()
287 |     command = 'cat /proc/1/environ'
288 |     env = {}
289 | 
290 |     run(mock_logger, command, env=env, check=False)
291 | 
292 |     mock_logger.info.assert_any_call('cat: /proc/1/environ: Permission denied')
293 | 
294 | 
295 | @patch('subprocess.Popen', autospec=True)
296 | def test_run_skip_log(mock_popen):
297 |     mock_logger = Mock()
298 |     command = 'foobar'
299 | 
300 |     mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar')))
301 | 
302 |     run(mock_logger, command, skip_log=True)
303 | 
304 |     mock_popen.assert_called_once_with(
305 |         shlex.split(command),
306 |         cwd=None,
307 |         env=None,
308 |         shell=False,
309 |         executable=None,
310 |         stderr=subprocess.STDOUT,
311 |         stdout=subprocess.PIPE,
312 |         bufsize=1,
313 |         encoding='utf-8',
314 |         text=True,
315 |         preexec_fn=setuser,
316 |     )
317 | 
318 |     mock_logger.info.assert_not_called()
319 | 


--------------------------------------------------------------------------------