├── .cloudgov ├── manifest.yml └── vars │ ├── pages-dev.yml │ ├── pages-production.yml │ └── pages-staging.yml ├── .codeclimate.yml ├── .coveragerc ├── .cz.json ├── .dockerignore ├── .flake8 ├── .github ├── dependabot.yml ├── pull_request_template.md └── workflows │ └── codeql-analysis.yml ├── .gitignore ├── .local.sample.json ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile-db ├── Dockerfile-exp ├── Dockerfile-test ├── LICENSE.md ├── README.md ├── bin ├── migrate.sql └── push-docker-image.sh ├── ci ├── partials │ ├── audit.yml │ ├── build.yml │ ├── deploy.yml │ └── test.yml ├── pipeline-dev.yml ├── pipeline-production.yml ├── pipeline-staging.yml ├── pipeline.yml └── tasks │ ├── deploy.sh │ ├── pip-audit.sh │ └── test.sh ├── docker-compose.yml ├── docker └── ua-attach-config.sh ├── echo-server ├── Dockerfile └── run.py ├── pytest.ini ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── src ├── build.py ├── common.py ├── crypto │ ├── __init__.py │ └── decrypt.py ├── log_utils │ ├── __init__.py │ ├── common.py │ ├── db_handler.py │ ├── delta_to_mins_secs.py │ ├── get_logger.py │ ├── monitoring.py │ └── remote_logs.py ├── main.py ├── publishing │ ├── __init__.py │ ├── models.py │ └── s3publisher.py ├── repo_config │ ├── __init__.py │ └── repo_config.py ├── runner │ └── __init__.py └── steps │ ├── __init__.py │ ├── build.py │ ├── cache.py │ ├── exceptions.py │ ├── fetch.py │ └── publish.py └── test ├── __init__.py ├── publishing ├── __init__.py ├── test_models.py └── test_s3publisher.py ├── repo_config ├── __init__.py └── test_repo_config.py ├── support.py ├── test_build.py ├── test_cache.py ├── test_crypto.py ├── test_fetch.py ├── test_log_utils.py ├── test_publish.py ├── test_remote_logs.py ├── test_repo_config.py └── test_runner.py /.cloudgov/manifest.yml: -------------------------------------------------------------------------------- 1 | --- 2 | applications: 3 | - name: ((product))-build-container((env_postfix)) 4 | no-route: true 5 | health-check-type: process 6 | instances: 0 7 | services: 8 | - federalist-((env))-rds 9 | - federalist-((env))-uev-key 10 | - pages-((env))-encryption 11 | metadata: 12 | labels: 13 | type: build-container 14 | name: default 15 | annotations: 16 | command: cd app && python main.py -p 17 | - name: ((product))-build-container-exp((env_postfix)) 18 | no-route: true 19 | health-check-type: process 20 | instances: 0 21 | services: 22 | - federalist-((env))-rds 23 | - federalist-((env))-uev-key 24 | metadata: 25 | labels: 26 | type: build-container 27 | name: exp 28 | annotations: 29 | command: cd app && ./build -p 30 | -------------------------------------------------------------------------------- /.cloudgov/vars/pages-dev.yml: -------------------------------------------------------------------------------- 1 | env: dev 2 | env_postfix: -dev 3 | product: pages 4 | -------------------------------------------------------------------------------- /.cloudgov/vars/pages-production.yml: -------------------------------------------------------------------------------- 1 | env: production 2 | env_postfix: '-production' 3 | product: pages 4 | -------------------------------------------------------------------------------- /.cloudgov/vars/pages-staging.yml: -------------------------------------------------------------------------------- 1 | env: staging 2 | env_postfix: -staging 3 | product: pages -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | plugins: 4 | pep8: 5 | enabled: true 6 | 7 | exclude_patterns: 8 | - "bin" 9 | - "main.py" 10 | - "manifests" 11 | - "test" -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = */.local/* -------------------------------------------------------------------------------- /.cz.json: -------------------------------------------------------------------------------- 1 | { 2 | "commitizen": { 3 | "name": "cz_customize", 4 | "version_scheme": "semver", 5 | "version_provider": "scm", 6 | "update_changelog_on_bump": true, 7 | "major_version_zero": false, 8 | "bump_message": "chore: release $new_version", 9 | "gpg_sign": true, 10 | "changelog_incremental": true, 11 | "customize": { 12 | "message_template": "{{change_type}}:{% if show_message %} {{message}}{% endif %}", 13 | "example": "feat: this feature enable customize through config file", 14 | "schema": ": ", 15 | "schema_pattern": "^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test){1}(\\([\\w\\-\\.]+\\))?(!)?: ([\\w \\-'])+([\\s\\S]*)", 16 | "bump_pattern": "^(.+!|BREAKING CHANGE|chore|docs|feat|fix|perf|refactor|revert|style|test)(\\([\\w\\-\\.]+\\))?:", 17 | "bump_map": { 18 | ".+!": "MAJOR", 19 | "BREAKING CHANGE": "MAJOR", 20 | "feat": "MINOR", 21 | "fix": "PATCH", 22 | "chore": "PATCH", 23 | "docs": "PATCH", 24 | "perf": "PATCH", 25 | "refactor": "PATCH", 26 | "revert": "MINOR", 27 | "style": "PATCH", 28 | "test": "PATCH" 29 | }, 30 | "change_type_order": ["Breaking Changes", "Added", "Fixed", "Performance", "Reverted", "Maintenance", "Documentation"], 31 | "commit_parser": "^((?Pchore|docs|feat|fix|perf|refactor|revert|style|test|BREAKING CHANGE)(?:\\((?P[^()\r\n]*)\\)|\\()?(?P!)?|\\w+!):\\s(?P.*)?", 32 | "changelog_pattern": "^(.+!|BREAKING CHANGE|chore|docs|feat|fix|perf|refactor|revert|style|test)(\\([\\w\\-\\.]+\\))?:", 33 | "change_type_map": { 34 | "BREAKING CHANGE": "Breaking Changes", 35 | "chore": "Maintenance", 36 | "docs": "Documentation", 37 | "feat": "Added", 38 | "fix": "Fixed", 39 | "perf": "Performance", 40 | "refactor": "Maintenance", 41 | "revert": "Reverted", 42 | "style": "Maintenance", 43 | "test": "Maintenance" 44 | } 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | # Disable version updates for pip dependencies 8 | # This still allows for security updates but is pretty sloppy configuration from Github 9 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit 10 | open-pull-requests-limit: 0 11 | commit-message: 12 | prefix: '[ci skip] ' 13 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Changes proposed in this pull request: 2 | - 3 | - 4 | - 5 | 6 | ## security considerations 7 | [Note the any security considerations here, or make note of why there are none] 8 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ staging, main ] 6 | pull_request: 7 | branches: [ staging, main ] 8 | schedule: 9 | - cron: '28 17 * * 4' 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ 'python' ] 24 | 25 | steps: 26 | - name: Checkout repository 27 | uses: actions/checkout@v3 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: ${{ matrix.language }} 33 | 34 | - name: Autobuild 35 | uses: github/codeql-action/autobuild@v2 36 | 37 | - name: Perform CodeQL Analysis 38 | uses: github/codeql-action/analyze@v2 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | .coverage 3 | .env 4 | .local 5 | .pytest_cache/ 6 | .python-version 7 | .vscode 8 | tmp 9 | __pycache__/ 10 | coverage/ 11 | ci/vars/.* -------------------------------------------------------------------------------- /.local.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "aws_access_key_id": "ACCESS KEY ID", 3 | "aws_default_region": "REGION", 4 | "aws_secret_access_key": "SECRET ACCESS KEY", 5 | "bucket": "BUCKET", 6 | "github_token": "TOKEN", 7 | "status_callback": "http://echoserver:8989/status", 8 | "baseurl": "/", 9 | "config": "", 10 | "build_id": "12345", 11 | "generator": "GENERATOR", 12 | "owner": "OWNER", 13 | "branch": "BRANCH", 14 | "repository": "REPOSITORY", 15 | "site_prefix": "preview/OWNER/REPOSITORY", 16 | "user_environment_variables": [] 17 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.4 (2025-06-10) 2 | 3 | ### Fixed 4 | 5 | - Sets proper headings on CONTRIBUTING.md 6 | 7 | ### Maintenance 8 | 9 | - Update CONTRIBUTING.md 10 | - Update dependency requests to v2.32.4 11 | - update dependencies 12 | 13 | ## 0.1.3 (2024-12-05) 14 | 15 | ### Maintenance 16 | 17 | - Install and set default node to v20 18 | 19 | ## 0.1.2 (2024-10-29) 20 | 21 | ### Maintenance 22 | 23 | - support node v22, warn on v18 (#4644) 24 | - Bump cryptography from 42.0.7 to 43.0.1 in the pip 25 | 26 | ## 0.1.1 (2024-06-28) 27 | 28 | ### Maintenance 29 | 30 | - use correct audit step name 31 | - switch to ci boot 32 | - fix production release 33 | - add release workflow, wait for certain passes 34 | 35 | ## 0.1.0 (2024-06-21) 36 | 37 | ### Added 38 | 39 | - save build container metrics to API 40 | - Add dev deployment env for PRs to staging 41 | - Switch to using harden container for cf-image 42 | 43 | ### Fixed 44 | 45 | - Decrypt predefined keys in build params 46 | - Run build CalledProcessError exception 47 | - remove puts to git-resource in ci pipeline 48 | - install required usg dependencies 49 | - node 16 temporarily allowed 50 | - Add additional lib deps for site builds 51 | - Remove stack param since it is docker image 52 | - Update tests to account for request timeout kwarg addition 53 | - Add timeout to requests based on bandit findings 54 | - CI slack emoji for successful nightly restage 55 | - CI pipeline to properly use src input for nightly rebuild 56 | - Update tests with additional mock calls 57 | - Remove f-string for gem update command 58 | 59 | ### Performance 60 | 61 | - run uploader task in threads 62 | 63 | ### Maintenance 64 | 65 | - update docs, drop nightly restage 66 | - use correct input pipeline names 67 | - use pipeline tasks, use pr/main/tag release 68 | - Bump requests to v2.32.3 69 | - Add decrypt to build site params 70 | - Enable Dependabot security scanning (#458) 71 | - container hardening 72 | - Add dependency auditing with pip-audit (#458) 73 | - **ci**: Switch to general-task and registry-image for CI jobs 74 | - Add hardened git resource 75 | - Simplify CI notifications from task hooks 76 | - Update resource types and python deps to use hardened images 77 | - Adjust for Github GPG token expiration 78 | - don't build node 16 79 | - default to node 18 (#437) 80 | - Update app stack to cflinuxfs4 81 | - Add gem update --system for Jekyll builds 82 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Policy 2 | 3 | Cloud.gov is an open source project operated by the U.S. General Services Administration (GSA) to support federal agency missions. While we value transparency and collaboration, we must balance openness with the responsibilities of operating a secure, compliant, and trusted federal platform. 4 | 5 | ## ✅ Who can contribute 6 | We welcome contributions from: 7 | 8 | - Employees of U.S. federal agencies 9 | - Contractors working under a current agreement with a U.S. government entity 10 | - GSA-approved contributors as part of official interagency collaboration 11 | 12 | ## ❌ Who we cannot accept contributions from 13 | To avoid the appearance of government endorsement, manage supply chain risk, and maintain the integrity of our compliance posture, we do **not** accept unsolicited contributions from: 14 | 15 | - Individuals unaffiliated with the U.S. government 16 | - International contributors or organizations 17 | - Unvetted accounts or first-time contributors submitting minor changes 18 | 19 | If you're unsure whether your contribution fits, feel free to open an issue first so we can discuss it. 20 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax = docker/dockerfile:1.2 2 | FROM ubuntu:22.04 3 | 4 | # Install general dependencies 5 | RUN apt-get update \ 6 | && apt-get install -y --no-install-recommends \ 7 | apt-utils build-essential git curl libssl-dev \ 8 | libreadline-dev zlib1g-dev libffi-dev libgl1-mesa-glx \ 9 | sudo gnupg ca-certificates ubuntu-advantage-tools \ 10 | autoconf automake libgdbm-dev libncurses5-dev \ 11 | libsqlite3-dev libtool libyaml-dev pkg-config libgmp-dev \ 12 | libpq-dev libxi6 libjpeg-dev libpng-dev libtiff-dev libgif-dev \ 13 | libwebp-dev wget python3 python3-dev python3-pip\ 14 | # Ruby deps 15 | gawk bison sqlite3 16 | 17 | # Deps for container hardening 18 | RUN ln -sf "/usr/share/zoneinfo/$SYSTEM_TIMEZONE" /etc/localtime 19 | COPY docker/ua-attach-config.sh . 20 | RUN --mount=type=secret,id=UA_TOKEN ./ua-attach-config.sh && \ 21 | ua attach --attach-config ua-attach-config.yaml && \ 22 | rm ua-attach-config.yaml 23 | RUN apt-get -y -q install usg 24 | 25 | # Install and setup en_US.UTF-8 locale 26 | # This is necessary so that output from node/ruby/python 27 | # won't break or have weird indecipherable characters 28 | RUN apt-get update && \ 29 | apt-get install --reinstall -y locales && \ 30 | sed -i 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ 31 | locale-gen en_US.UTF-8 32 | 33 | # Install headless chrome 34 | ARG DEBIAN_FRONTEND=noninteractive 35 | RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ 36 | && echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' >> /etc/apt/sources.list.d/google.list \ 37 | && apt-get update \ 38 | && apt-get install -y google-chrome-unstable --no-install-recommends \ 39 | && rm -rf /var/lib/apt/lists/* 40 | 41 | ENV LANG en_US.UTF-8 42 | ENV LANGUAGE en_US 43 | ENV LC_ALL en_US.UTF-8 44 | 45 | RUN dpkg-reconfigure --frontend noninteractive locales 46 | 47 | SHELL ["/bin/bash", "-c"] 48 | 49 | # Disable ipv6 to enable fetching gpg keys for rvm 50 | # http://rvm.io/rvm/security#ipv6-issues 51 | RUN mkdir -p /root/.gnupg \ 52 | && echo 'disable-ipv6' >> /root/.gnupg/dirmngr.conf \ 53 | && echo 'rvm_silence_path_mismatch_check_flag=1' >> /etc/rvmrc \ 54 | && echo 'install: --no-document\nupdate: --no-document' >> /etc/.gemrc 55 | 56 | RUN useradd --no-log-init --system --create-home --groups sudo system \ 57 | && echo 'system ALL=(ALL:ALL) NOPASSWD:ALL' >> /etc/sudoers.d/system 58 | 59 | RUN useradd --no-log-init --system --create-home customer 60 | 61 | ############################################################### 62 | # Run these steps as the 'system' user 63 | # 64 | USER system 65 | 66 | # Install rvm 67 | RUN set -ex \ 68 | && for key in \ 69 | 409B6B1796C275462A1703113804BB82D39DC0E3 \ 70 | 7D2BAF1CF37B13E2069D6956105BD0E739499BDB \ 71 | ; do \ 72 | sudo gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys "$key" || \ 73 | sudo gpg --batch --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys "$key" || \ 74 | sudo gpg --batch --keyserver hkp://ipv4.pool.sks-keyservers.net --recv-keys "$key" || \ 75 | sudo gpg --batch --keyserver hkp://pgp.mit.edu:80 --recv-keys "$key" || \ 76 | sudo gpg --batch --keyserver hkp://keyserver.pgp.com --recv-keys "$key" ; \ 77 | done \ 78 | # We use 'sudo' here to support multi-user install 79 | # http://rvm.io/rvm/install#1-download-and-run-the-rvm-installation-script 80 | && \curl -sSL https://get.rvm.io | sudo -n bash -s stable 81 | 82 | # Add 'customer' user to rvm group 83 | RUN sudo usermod --append --groups rvm customer 84 | 85 | ############################################################### 86 | # Run these steps as the customer user 87 | # 88 | USER customer 89 | 90 | # Configure rvm and install default Ruby 91 | ENV RUBY_VERSION 3.1.4 92 | ENV RUBY_VERSION_MIN 3.0.6 93 | RUN source /usr/local/rvm/scripts/rvm \ 94 | # Fail if deps are missing, won't prompt for sudo 95 | && rvm autolibs read-fail \ 96 | && rvm install --no-docs $RUBY_VERSION \ 97 | && rvm use --default $RUBY_VERSION \ 98 | # Make rvm available in non-login bash shells 99 | && echo 'source /usr/local/rvm/scripts/rvm' >> ~/.bashrc 100 | 101 | # Update to the latest RubyGems 102 | RUN source /usr/local/rvm/scripts/rvm && \ 103 | rvm rubygems 3.4.22 104 | 105 | # Default to Node 20 106 | ENV NODE_VERSION lts/iron 107 | RUN curl https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash \ 108 | && \. "$HOME/.nvm/nvm.sh" \ 109 | && nvm install $NODE_VERSION 110 | 111 | 112 | ############################################################### 113 | # Run these steps and the container as the 'root' user 114 | # 115 | # This is necessary because the build code needs to have 116 | # rights to switch to 'customer' user 117 | # 118 | USER root 119 | 120 | WORKDIR /app 121 | 122 | COPY ./requirements.txt ./requirements.txt 123 | 124 | RUN pip3 install -r requirements.txt \ 125 | && rm ./requirements.txt 126 | 127 | RUN ln -s /usr/bin/python3 /usr/bin/python 128 | 129 | COPY ./src ./ 130 | 131 | # Container Hardening 132 | RUN usg fix cis_level1_server 133 | RUN apt-get purge --auto-remove -y ubuntu-advantage-tools 134 | -------------------------------------------------------------------------------- /Dockerfile-db: -------------------------------------------------------------------------------- 1 | FROM postgres:11 2 | 3 | COPY ./bin/migrate.sql /docker-entrypoint-initdb.d/migrate.sql -------------------------------------------------------------------------------- /Dockerfile-exp: -------------------------------------------------------------------------------- 1 | ################# 2 | # Build Image # 3 | ################# 4 | FROM python:3.8-buster AS builder 5 | WORKDIR /app 6 | RUN pip install pyinstaller staticx patchelf-wrapper 7 | COPY ./src ./requirements.txt ./ 8 | RUN pip install -r requirements.txt 9 | RUN \ 10 | pyinstaller -F -n tmp-build --distpath ./dist --hidden-import='pkg_resources.py2_warn' ./main.py \ 11 | && staticx ./dist/tmp-build ./dist/build 12 | 13 | ################# 14 | # Final Image # 15 | ################# 16 | FROM ruby:2.7-slim 17 | 18 | RUN \ 19 | apt-get update && apt-get install -y --no-install-recommends \ 20 | curl \ 21 | git \ 22 | gnupg \ 23 | dirmngr \ 24 | wget \ 25 | sudo \ 26 | gawk bison sqlite3 patch g++ gcc autoconf automake libgdbm-dev \ 27 | libncurses5-dev libsqlite3-dev libtool make patch pkg-config \ 28 | libreadline-dev \ 29 | && rm -rf /var/lib/apt/lists/* 30 | 31 | # Install headless chrome 32 | RUN \ 33 | wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ 34 | && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ 35 | && apt-get update \ 36 | && apt-get install -y google-chrome-unstable --no-install-recommends \ 37 | && rm -rf /var/lib/apt/lists/* 38 | 39 | SHELL ["/bin/bash", "-c"] 40 | 41 | # Disable ipv6 to enable fetching gpg keys for rvm 42 | # http://rvm.io/rvm/security#ipv6-issues 43 | RUN mkdir -p /root/.gnupg \ 44 | && echo 'disable-ipv6' >> /root/.gnupg/dirmngr.conf \ 45 | && echo 'rvm_silence_path_mismatch_check_flag=1' >> /etc/rvmrc \ 46 | && echo 'install: --no-document\nupdate: --no-document' >> /etc/.gemrc 47 | 48 | RUN useradd --no-log-init --system --create-home --groups sudo system \ 49 | && echo 'system ALL=(ALL:ALL) NOPASSWD:ALL' >> /etc/sudoers.d/system 50 | 51 | RUN useradd --no-log-init --system --create-home customer 52 | 53 | ############################################################### 54 | # Run these steps as the 'system' user 55 | # 56 | USER system 57 | 58 | # Install rvm 59 | RUN set -ex \ 60 | && for key in \ 61 | 7D2BAF1CF37B13E2069D6956105BD0E739499BDB \ 62 | 409B6B1796C275462A1703113804BB82D39DC0E3 \ 63 | ; do \ 64 | sudo gpg --batch --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys "$key" || \ 65 | sudo gpg --batch --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys "$key" || \ 66 | sudo gpg --batch --keyserver hkp://ipv4.pool.sks-keyservers.net --recv-keys "$key" || \ 67 | sudo gpg --batch --keyserver hkp://pgp.mit.edu:80 --recv-keys "$key" || \ 68 | sudo gpg --batch --keyserver hkp://keyserver.pgp.com --recv-keys "$key" ; \ 69 | done \ 70 | # We use 'sudo' here to support multi-user install 71 | # http://rvm.io/rvm/install#1-download-and-run-the-rvm-installation-script 72 | && \curl -sSL https://get.rvm.io | sudo -n bash -s stable 73 | 74 | # Add 'customer' user to rvm group 75 | RUN sudo usermod --append --groups rvm customer 76 | 77 | 78 | ############################################################### 79 | # Run these steps as the customer user 80 | # 81 | USER customer 82 | 83 | # Configure rvm and install default Ruby 84 | ENV RUBY_VERSION 2.7.5 85 | ENV RUBY_VERSION_MIN 2.6.6 86 | RUN source /usr/local/rvm/scripts/rvm \ 87 | # Fail if deps are missing, won't prompt for sudo 88 | && rvm autolibs read-fail \ 89 | && rvm install --no-docs $RUBY_VERSION \ 90 | && rvm use --default $RUBY_VERSION \ 91 | # Make rvm available in non-login bash shells 92 | && echo 'source /usr/local/rvm/scripts/rvm' >> ~/.bashrc 93 | 94 | # Default to Node 20 95 | ENV NODE_VERSION lts/iron 96 | RUN curl https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | bash \ 97 | && \. "$HOME/.nvm/nvm.sh" \ 98 | && nvm install $NODE_VERSION 99 | 100 | 101 | ############################################################### 102 | # Run these steps and the container as the 'root' user 103 | # 104 | # This is necessary because the build code needs to have 105 | # rights to switch to 'customer' user 106 | # 107 | USER root 108 | 109 | WORKDIR /app 110 | 111 | COPY --from=builder /app/dist/build . 112 | 113 | CMD ["./build"] 114 | -------------------------------------------------------------------------------- /Dockerfile-test: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | SHELL ["/bin/bash", "-c"] 4 | 5 | RUN groupadd -r rvm \ 6 | && useradd --no-log-init --system --create-home --groups rvm customer 7 | 8 | WORKDIR /app 9 | 10 | COPY ./requirements.txt ./requirements.txt 11 | COPY ./requirements-dev.txt ./requirements-dev.txt 12 | 13 | RUN pip install -r requirements-dev.txt -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | As a work of the United States Government, this project is in the 2 | public domain within the United States. 3 | 4 | Additionally, we waive copyright and related rights in the work 5 | worldwide through the CC0 1.0 Universal public domain dedication. 6 | 7 | ## CC0 1.0 Universal Summary 8 | 9 | This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode). 10 | 11 | ### No Copyright 12 | 13 | The person who associated a work with this deed has dedicated the work to 14 | the public domain by waiving all of his or her rights to the work worldwide 15 | under copyright law, including all related and neighboring rights, to the 16 | extent allowed by law. 17 | 18 | You can copy, modify, distribute and perform the work, even for commercial 19 | purposes, all without asking permission. 20 | 21 | ### Other Information 22 | 23 | In no way are the patent or trademark rights of any person affected by CC0, 24 | nor are the rights that other persons may have in the work or in how the 25 | work is used, such as publicity or privacy rights. 26 | 27 | Unless expressly stated otherwise, the person who associated a work with 28 | this deed makes no warranties about the work, and disclaims liability for 29 | all uses of the work, to the fullest extent permitted by applicable law. 30 | When using or citing the work, you should not imply endorsement by the 31 | author or the affirmer. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pages Build Container 2 | 3 | Docker image for building and publishing static sites as part of the cloud.gov Pages platform. 4 | 5 | Generally, site builds work in three stages: clone, build, and publish. Each stage is broken down into a number of steps. First, the container checks out the site from GitHub. Then it builds the site with the specified build engine. Then it gzip compresses text files and sets cache control headers. Finally, it uploads the built site to S3, and also creates redirect objects for directories, such as `/path` => `/path/`. 6 | 7 | ## Usage 8 | 9 | ### Command 10 | ``` 11 | python main.py [options] 12 | ``` 13 | 14 | ### Command options 15 | One of the following flags *must* be specified: 16 | 17 | | Flag | Example | Description | 18 | | ---- | ------- | ----------- | 19 | | `-p`, `--params` | `-p '{"foo": "bar"}'` | An encrypted JSON encoded string containing the [build arguments](#build-arguments) | 20 | | `-f`, `--file` | `--file ./.local/my-build.json` | A path to a JSON file containing the [build arguments](#build-arguments) | 21 | 22 | ### Using cloud.gov tasks 23 | ``` 24 | cf run-task "cd app && python main.py [options]" 25 | ``` 26 | 27 | ### Using `docker-compose` 28 | ``` 29 | docker-compose run --rm app python main.py [options] 30 | ``` 31 | 32 | ### Full examples 33 | ``` 34 | # build arguments provided as a JSON encoded string 35 | 36 | cf run-task pages-build-container "python main.py -p '{\"foo\": \"bar\"}'" --name "build-123" 37 | ``` 38 | 39 | ``` 40 | # build arguments provided in a JSON encoded file 41 | 42 | docker-compose run --rm app python main.py -f /tmp/local/my-build.json 43 | ``` 44 | 45 | ## Environment variables 46 | 47 | | Name | Optional? | VCAP Service | Description | 48 | | ---- | :-------: | ------------ | ----------- | 49 | | `CACHE_CONTROL` | Y | | Default value to set for the `Cache-Control` header of all published files, default is `max-age=60` | 50 | | `DATABASE_URL` | N | | The URL of the database for database logging | 51 | | `USER_ENVIRONMENT_VARIABLE_KEY` | N | `federalist-{space}-uev-key` | Encryption key to decrypt user environment variables | 52 | | `MAX_WORKERS` | N | | Maximum number of workers/threads to use when uploading files to S3 | 53 | 54 | When running locally, environment variables are configured in `docker-compose.yml` under the `app` service. 55 | 56 | ## Connected CF service 57 | 58 | | Name | Type | Description | 59 | | ---- | ---- | ----------- | 60 | | `federalist-((env))-rds` | Brokered | The RDS db credentials | 61 | | `federalist-((env))-uev-key` | User Provided | The site environment variable encryption key | 62 | | `pages-((env))-encryption` | User Provided | The site build params encryption key | 63 | 64 | ## Build arguments 65 | 66 | | Name | Optional? | Default | Description | 67 | | ---- | :-------: | ------- | ----------- | 68 | | `aws_access_key_id` | N | | AWS access key for the destination S3 bucket | 69 | | `aws_secret_access_key` | N | | AWS secret key for the destination S3 bucket | 70 | | `aws_default_region` | N | | AWS region for the destination S3 bucket | 71 | | `bucket` | N | | AWS S3 bucket name for the destination S3 bucket | 72 | | `github_token` | Y | `None` | GitHub auth token for cloning the repository | 73 | | `status_callback` | N | | The URL the container should use to report the status of the completed build (ie, success or failure) | 74 | | `config` | Y | `None` | A yaml block of configuration to add to `_config.yml` before building. Currently only used in `jekyll` site builds | 75 | | `generator` | N | | The engine to use to build the site (`'jekyll'`, `'hugo'`, `'node.js'`, or `'static'`) | 76 | | `owner` | N | | The GitHub organization of the source repository | 77 | | `repository` | N | | The name of source the repository | 78 | | `branch` | N | | The branch of the source repository to build | 79 | | `site_prefix` | N | | The S3 bucket "path" that the site files will be published to. It should **not** have a trailing or prefix slash (Ex. `preview///`) | 80 | | `baseurl` | Y | `None` | The base URL that will be used by the build engine to determine the absolute path for site assets (blank for custom domains, the `site_prefix` with a preceding `/` for preview domains | 81 | | `user_environment_variables` | Y | | Array of objects containing the name and encrypted values of user-provided environment variables (Ex. `[{ name: "MY ENV VAR", ciphertext: "ABC123" }]`) | 82 | 83 | 84 | ### Encrypted params argument 85 | 86 | When build parameters are passed to the build script using the `-p / --params` flag, they are an encrypted JSON encoded string created by the pages-core queue worker and decrypted using a shared key stored as CF user provided service `pages--encryption` and the [decrypt cipher](./src/crypto/decrypt.py). 87 | 88 | ## Environment variables provided during builds 89 | 90 | The following environment variables are available during site builds and when running the `federalist` npm script. They may be useful for customizing the display of certain information in the published site, for example, to display the current published branch name. 91 | 92 | * `OWNER` 93 | * `REPOSITORY` 94 | * `BRANCH` 95 | * `SITE_PREFIX` 96 | * `BASEURL` 97 | 98 | ## Development 99 | 100 | ### Getting started 101 | 102 | #### Requirements 103 | - [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/) 104 | - AWS S3 bucket name and associated credentials (key, secret, region) 105 | - A Github repository with a Pages-compatible site 106 | - A Github Personal Access Token if building a private repository, see [creating a new personal token for your GitHub account](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) for more information. 107 | 108 | #### Clone the repository 109 | ```sh 110 | git clone git@github.com:cloud-gov/pages-build-container.git 111 | cd pages-build-container 112 | ``` 113 | 114 | #### Create build arguments 115 | ```sh 116 | mkdir -p .local 117 | cp .local.sample.json .local/my-build.json 118 | ``` 119 | 120 | #### Update build arguments 121 | Update the appropriate fields to contain the desired values for your build, see [build arguments](#build-arguments) for options. The `.local` folder should not be checked into version control (it is in `.gitignore`) and will be mounted into the Docker container at `/tmp/local`. 122 | 123 | #### Initialize the database 124 | This only needs to be once. To force a reinitialization of the database, remove the `tmp/db` folder in the project root and run the below command again. 125 | 126 | ```sh 127 | docker-compose run --rm db 128 | ``` 129 | Then kill the process when it is done. 130 | 131 | #### Run the build 132 | ```sh 133 | docker-compose build 134 | docker-compose run --rm app python main.py -f /tmp/local/my-build.json 135 | ``` 136 | If the database is not ready when running a build (despite the healthcheck), just try running the build again. 137 | 138 | #### Interact with the build environment 139 | ```sh 140 | docker-compose run --rm app bash 141 | python main.py -f /tmp/local/my-build.json 142 | ``` 143 | 144 | ### Inspecting the database 145 | 146 | 1. Ensure the database is running (in the background) 147 | ``` 148 | docker-compose up -d --no-deps db 149 | ``` 150 | 151 | 2. Run psql in the container 152 | ``` 153 | docker-compose exec db psql -U postgres -d pages 154 | ``` 155 | 156 | ### Inspecting logs 157 | During or after builds the echoserver and database logs can be viewed with: 158 | ```sh 159 | # all logs 160 | docker-compose logs 161 | 162 | # only the echo server 163 | docker-compose logs echoserver 164 | 165 | # only the db 166 | docker-compose logs db 167 | ``` 168 | 169 | ### Testing 170 | 1. Build the test image 171 | ```sh 172 | docker-compose build test 173 | ``` 174 | 175 | 2. Run any testing steps 176 | ```sh 177 | # unit tests 178 | docker-compose run --rm test pytest 179 | 180 | # unit tests with code coverage 181 | docker-compose run --rm test pytest --cov-report xml:./coverage/coverage.xml --cov-report html:./coverage --cov-report term --cov=src 182 | 183 | # lint 184 | docker-compose run --rm test flake8 185 | 186 | # static analysis 187 | docker-compose run --rm test bandit -r src 188 | ``` 189 | 190 | ### Continuous Integration 191 | We use Concourse CI for our CI/CD system. To use Concourse, one must have appropriate permissions in UAA as administered by the cloud.gov operators. Access to Concourse also requires using the GSA VPN. 192 | 193 | 1. To get started install and authenticate with the `fly` CLI: 194 | - `brew install --cask fly` 195 | - `fly -t login -n pages -c ` 196 | 197 | 2. Update local credential files (see ci/vars/example.yml) 198 | 199 | #### CI deployments 200 | This repository contains three distinct deployment pipelines in concourse: 201 | - [__build-container production__](./ci/pipeline-production.yml); starts on a new tag pushed to the `main` branch 202 | - [__build-container staging__](./ci/pipeline-staging.yml); starts on a new commit pushed to the `main` branch. 203 | - [__build-container dev__](./ci/pipeline-dev.yml); starts when a PR is created into the `main` branch. 204 | 205 | Each pipeline runs tests, creates the appropriate site build container image, pushes it to ECR, and then deploys the image for the build container app. 206 | 207 | ##### Pipeline instance variables 208 | Three instances of the pipeline are set for the `pages dev`, `pages staging` and `pages production` environments. Instance variables are used to fill in Concourse pipeline parameter variables bearing the same name as the instance variable. See more on [Concourse vars](https://concourse-ci.org/vars.html). Each instance of the pipeline has three instance variables associated to it: `deploy-env`, `git-branch`. 209 | 210 | |Instance Variable|Pages Dev|Pages Staging|Pages Production| 211 | --- | --- | ---| ---| 212 | |**`deploy-env`**|`dev`|`staging`|`production`| 213 | 214 | ## Public domain 215 | 216 | This project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md): 217 | 218 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/). 219 | > 220 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. 221 | 222 | [Federalist]: https://federalist.18f.gov 223 | [cloud.gov Pages]: https://cloud.gov/pages 224 | [Docker Compose]: https://docs.docker.com/compose/install/ 225 | [Docker]: https://docs.docker.com/engine/installation/ 226 | [pages-builder]: https://github.com/cloud-gov/pages-builder 227 | -------------------------------------------------------------------------------- /bin/migrate.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS buildlog (id serial PRIMARY KEY, build integer, source varchar, output varchar); 2 | ALTER TABLE buildlog ADD COLUMN IF NOT EXISTS "createdAt" timestamp; 3 | ALTER TABLE buildlog ADD COLUMN IF NOT EXISTS "updatedAt" timestamp; 4 | -------------------------------------------------------------------------------- /bin/push-docker-image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eo pipefail 3 | 4 | # federalist-garden-build-dev-task 5 | TAG=$1 6 | 7 | # Make sure local registry is running on localhost:5000 8 | docker build --tag $TAG . 9 | docker tag $TAG localhost:5000/$TAG 10 | docker push localhost:5000/$TAG 11 | -------------------------------------------------------------------------------- /ci/partials/audit.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | inputs: [name: src] 3 | outputs: [name: src] 4 | run: 5 | dir: src 6 | path: ci/tasks/pip-audit.sh 7 | -------------------------------------------------------------------------------- /ci/partials/build.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | inputs: 3 | - name: src 4 | path: . 5 | outputs: 6 | - name: image 7 | run: 8 | path: build 9 | params: 10 | BUILDKIT_SECRETTEXT_UA_TOKEN: ((ua-token)) 11 | -------------------------------------------------------------------------------- /ci/partials/deploy.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | inputs: 3 | - name: src 4 | - name: image-repository 5 | run: 6 | dir: src 7 | path: ci/tasks/deploy.sh 8 | -------------------------------------------------------------------------------- /ci/partials/test.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | inputs: [name: src] 3 | outputs: [name: src] 4 | run: 5 | dir: src 6 | path: ci/tasks/test.sh 7 | -------------------------------------------------------------------------------- /ci/pipeline-dev.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #@ load("funcs.lib.yml", "slack_hook") 3 | #@ load("@ytt:data", "data") 4 | #@ load("@ytt:template", "template") 5 | 6 | #! JOBS 7 | 8 | jobs: 9 | - name: set-pipeline 10 | plan: 11 | - get: src 12 | resource: pr-((deploy-env)) 13 | trigger: true 14 | - get: pipeline-tasks 15 | - get: general-task 16 | - task: init 17 | image: general-task 18 | file: pipeline-tasks/tasks/init.yml 19 | params: 20 | PIPELINE_YML: src/ci/pipeline-dev.yml 21 | - set_pipeline: build-container 22 | file: compiled/set-pipeline.yml 23 | instance_vars: 24 | deploy-env: ((deploy-env)) 25 | 26 | - name: test-((deploy-env)) 27 | plan: 28 | - get: src 29 | resource: pr-((deploy-env)) 30 | trigger: true 31 | passed: [set-pipeline] 32 | 33 | - put: src 34 | resource: pr-((deploy-env)) 35 | params: 36 | path: src 37 | status: pending 38 | base_context: concourse 39 | context: test-pages-build-container-((deploy-env)) 40 | - get: python 41 | - task: test 42 | image: python 43 | file: src/ci/partials/test.yml 44 | 45 | on_success: 46 | put: src 47 | resource: pr-((deploy-env)) 48 | params: 49 | path: src 50 | status: success 51 | base_context: concourse 52 | context: test-pages-build-container-((deploy-env)) 53 | 54 | on_failure: 55 | in_parallel: 56 | - put: src 57 | resource: pr-((deploy-env)) 58 | params: 59 | path: src 60 | status: failure 61 | base_context: concourse 62 | context: test-pages-build-container-((deploy-env)) 63 | - #@ slack_hook("failure", "tests") 64 | 65 | - name: deploy-((deploy-env)) 66 | plan: 67 | - get: src 68 | resource: pr-((deploy-env)) 69 | trigger: true 70 | passed: [test-((deploy-env))] 71 | - get: general-task 72 | - get: oci-build-task 73 | - task: build 74 | privileged: true 75 | image: oci-build-task 76 | file: src/ci/partials/build.yml 77 | 78 | - put: image-repository 79 | params: 80 | image: image/image.tar 81 | - task: deploy 82 | image: general-task 83 | file: src/ci/partials/deploy.yml 84 | params: 85 | _: #@ template.replace(data.values.env_cf) 86 | CF_APP_NAME: pages-build-container-((deploy-env)) 87 | CF_MANIFEST: .cloudgov/manifest.yml 88 | CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml 89 | IMAGE_REPOSITORY: ../image-repository/repository 90 | IMAGE_TAG: pages-((deploy-env)) 91 | CF_DOCKER_USERNAME: ((ecr-aws-key)) 92 | CF_DOCKER_PASSWORD: ((ecr-aws-secret)) 93 | 94 | on_failure: #@ slack_hook("failure", "deployment") 95 | 96 | - name: audit-dependencies 97 | plan: 98 | - get: src 99 | resource: pr-((deploy-env)) 100 | trigger: true 101 | passed: [set-pipeline] 102 | 103 | - put: src 104 | resource: pr-((deploy-env)) 105 | params: 106 | path: src 107 | status: pending 108 | base_context: concourse 109 | context: audit-dependencies 110 | 111 | - get: python 112 | - task: pip-audit 113 | image: python 114 | file: src/ci/partials/audit.yml 115 | 116 | on_failure: 117 | in_parallel: 118 | - put: src 119 | resource: pr-((deploy-env)) 120 | params: 121 | path: src 122 | status: failure 123 | base_context: concourse 124 | context: audit-dependencies 125 | - #@ slack_hook("failure", "dependency audit") 126 | 127 | on_success: 128 | in_parallel: 129 | - put: src 130 | resource: pr-((deploy-env)) 131 | params: 132 | path: src 133 | status: success 134 | base_context: concourse 135 | context: audit-dependencies 136 | - #@ slack_hook("success", "dependency audit") 137 | 138 | #! RESOURCES 139 | 140 | resources: 141 | - name: pr-((deploy-env)) 142 | type: pull-request 143 | check_every: 1m 144 | source: 145 | repository: ((build-container-repository-path)) 146 | access_token: ((gh-access-token)) 147 | base_branch: main 148 | disable_forks: true 149 | ignore_drafts: false 150 | 151 | - name: image-repository 152 | type: registry-image 153 | source: 154 | aws_access_key_id: ((ecr-aws-key)) 155 | aws_secret_access_key: ((ecr-aws-secret)) 156 | repository: pages-build-container 157 | aws_region: us-gov-west-1 158 | tag: pages-((deploy-env)) 159 | 160 | - name: slack 161 | - name: pipeline-tasks 162 | - name: python 163 | - name: general-task 164 | - name: oci-build-task 165 | 166 | #! RESOURCE TYPES 167 | 168 | resource_types: 169 | - name: git 170 | - name: slack-notification 171 | - name: pull-request 172 | - name: registry-image 173 | -------------------------------------------------------------------------------- /ci/pipeline-production.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #@ load("funcs.lib.yml", "slack_hook") 3 | #@ load("@ytt:data", "data") 4 | #@ load("@ytt:template", "template") 5 | 6 | #! JOBS 7 | 8 | jobs: 9 | - name: set-pipeline 10 | plan: 11 | - get: src 12 | resource: src-((deploy-env))-tagged 13 | params: { depth: 1 } 14 | trigger: true 15 | - get: pipeline-tasks 16 | - get: general-task 17 | - task: init 18 | image: general-task 19 | file: pipeline-tasks/tasks/init.yml 20 | params: 21 | PIPELINE_YML: src/ci/pipeline-production.yml 22 | - set_pipeline: build-container 23 | file: compiled/set-pipeline.yml 24 | instance_vars: 25 | deploy-env: ((deploy-env)) 26 | 27 | - name: test-((deploy-env)) 28 | plan: 29 | - get: src 30 | resource: src-((deploy-env))-tagged 31 | trigger: true 32 | params: { depth: 1 } 33 | passed: [set-pipeline] 34 | - get: python 35 | - task: test 36 | image: python 37 | file: src/ci/partials/test.yml 38 | 39 | on_failure: #@ slack_hook("failure", "tests") 40 | 41 | - name: deploy-((deploy-env)) 42 | plan: 43 | - get: src 44 | resource: src-((deploy-env))-tagged 45 | trigger: true 46 | params: { depth: 1 } 47 | passed: [test-((deploy-env)), audit-dependencies] 48 | - get: general-task 49 | - get: oci-build-task 50 | - task: build 51 | privileged: true 52 | image: oci-build-task 53 | file: src/ci/partials/build.yml 54 | - put: image-repository 55 | params: 56 | image: image/image.tar 57 | - task: deploy 58 | image: general-task 59 | file: src/ci/partials/deploy.yml 60 | params: 61 | _: #@ template.replace(data.values.env_cf) 62 | CF_APP_NAME: pages-build-container-((deploy-env)) 63 | CF_MANIFEST: .cloudgov/manifest.yml 64 | CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml 65 | IMAGE_REPOSITORY: ../image-repository/repository 66 | IMAGE_TAG: pages-((deploy-env)) 67 | CF_DOCKER_USERNAME: ((ecr-aws-key)) 68 | CF_DOCKER_PASSWORD: ((ecr-aws-secret)) 69 | 70 | on_success: #@ slack_hook("success", "deployment") 71 | on_failure: #@ slack_hook("failure", "deployment") 72 | 73 | - name: audit-dependencies 74 | plan: 75 | - get: src 76 | resource: src-((deploy-env))-tagged 77 | trigger: true 78 | passed: [set-pipeline] 79 | 80 | - get: python 81 | - task: pip-audit 82 | image: python 83 | file: src/ci/partials/audit.yml 84 | 85 | on_failure: #@ slack_hook("failure", "dependency audit") 86 | on_success: #@ slack_hook("success", "dependency audit") 87 | 88 | - name: release 89 | plan: 90 | - get: src 91 | resource: src-((deploy-env))-tagged 92 | params: { depth: 1 } 93 | trigger: true 94 | passed: [deploy-((deploy-env))] 95 | - #@ template.replace(data.values.release_steps) 96 | 97 | #! RESOURCES 98 | 99 | resources: 100 | - name: src-((deploy-env))-tagged 101 | type: git 102 | icon: github 103 | source: 104 | uri: ((git-base-url))/((build-container-repository-path)) 105 | branch: main 106 | commit_verification_keys: ((cloud-gov-pages-gpg-keys)) 107 | tag_filter: 0.*.* 108 | fetch_tags: true 109 | 110 | - name: image-repository 111 | type: registry-image 112 | source: 113 | aws_access_key_id: ((ecr-aws-key)) 114 | aws_secret_access_key: ((ecr-aws-secret)) 115 | repository: pages-build-container 116 | aws_region: us-gov-west-1 117 | tag: pages-((deploy-env)) 118 | 119 | - name: pages-release 120 | type: github-release 121 | source: 122 | owner: cloud-gov 123 | repository: pages-build-container 124 | access_token: ((gh-access-token)) 125 | 126 | - name: slack 127 | - name: pipeline-tasks 128 | - name: python 129 | - name: general-task 130 | - name: oci-build-task 131 | 132 | #! RESOURCE TYPES 133 | 134 | resource_types: 135 | - name: git 136 | - name: slack-notification 137 | - name: pull-request 138 | - name: registry-image 139 | - name: github-release 140 | -------------------------------------------------------------------------------- /ci/pipeline-staging.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #@ load("funcs.lib.yml", "slack_hook") 3 | #@ load("@ytt:data", "data") 4 | #@ load("@ytt:template", "template") 5 | 6 | #! JOBS 7 | 8 | jobs: 9 | - name: set-pipeline 10 | plan: 11 | - get: src 12 | resource: src-((deploy-env)) 13 | params: { depth: 1 } 14 | trigger: true 15 | - get: pipeline-tasks 16 | - get: general-task 17 | - task: init 18 | image: general-task 19 | file: pipeline-tasks/tasks/init.yml 20 | params: 21 | PIPELINE_YML: src/ci/pipeline-staging.yml 22 | - set_pipeline: build-container 23 | file: compiled/set-pipeline.yml 24 | instance_vars: 25 | deploy-env: ((deploy-env)) 26 | 27 | - name: update-release-branch 28 | plan: 29 | - get: src 30 | resource: src-((deploy-env)) 31 | trigger: true 32 | - get: general-task 33 | - get: pipeline-tasks 34 | - task: update-release-branch 35 | image: general-task 36 | file: pipeline-tasks/tasks/update-release-branch.yml 37 | 38 | - name: test-((deploy-env)) 39 | plan: 40 | - get: src 41 | resource: src-((deploy-env)) 42 | trigger: true 43 | params: { depth: 1 } 44 | passed: [set-pipeline] 45 | - get: python 46 | - task: test 47 | image: python 48 | file: src/ci/partials/test.yml 49 | 50 | on_failure: #@ slack_hook("failure", "tests") 51 | 52 | - name: deploy-((deploy-env)) 53 | plan: 54 | - get: src 55 | resource: src-((deploy-env)) 56 | trigger: true 57 | params: { depth: 1 } 58 | passed: [test-((deploy-env)), audit-dependencies] 59 | - get: general-task 60 | - get: oci-build-task 61 | - task: build 62 | privileged: true 63 | image: oci-build-task 64 | file: src/ci/partials/build.yml 65 | - put: image-repository 66 | params: 67 | image: image/image.tar 68 | - task: deploy 69 | image: general-task 70 | file: src/ci/partials/deploy.yml 71 | params: 72 | _: #@ template.replace(data.values.env_cf) 73 | CF_APP_NAME: pages-build-container-((deploy-env)) 74 | CF_MANIFEST: .cloudgov/manifest.yml 75 | CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml 76 | IMAGE_REPOSITORY: ../image-repository/repository 77 | IMAGE_TAG: pages-((deploy-env)) 78 | CF_DOCKER_USERNAME: ((ecr-aws-key)) 79 | CF_DOCKER_PASSWORD: ((ecr-aws-secret)) 80 | 81 | on_success: #@ slack_hook("success", "deployment") 82 | on_failure: #@ slack_hook("failure", "deployment") 83 | 84 | - name: audit-dependencies 85 | plan: 86 | - get: src 87 | resource: src-((deploy-env)) 88 | trigger: true 89 | passed: [set-pipeline] 90 | 91 | - get: python 92 | - task: pip-audit 93 | image: python 94 | file: src/ci/partials/audit.yml 95 | 96 | on_failure: #@ slack_hook("failure", "dependency audit") 97 | on_success: #@ slack_hook("success", "dependency audit") 98 | 99 | #! RESOURCES 100 | 101 | resources: 102 | - name: src-((deploy-env)) 103 | type: git 104 | icon: github 105 | source: 106 | uri: git@github.com:/((build-container-repository-path)) 107 | branch: main 108 | commit_verification_keys: ((cloud-gov-pages-gpg-keys)) 109 | private_key: ((pages-gpg-operations-github-sshkey.private_key)) 110 | 111 | - name: image-repository 112 | type: registry-image 113 | source: 114 | aws_access_key_id: ((ecr-aws-key)) 115 | aws_secret_access_key: ((ecr-aws-secret)) 116 | repository: pages-build-container 117 | aws_region: us-gov-west-1 118 | tag: pages-((deploy-env)) 119 | 120 | - name: slack 121 | - name: pipeline-tasks 122 | - name: python 123 | - name: general-task 124 | - name: oci-build-task 125 | 126 | #! RESOURCE TYPES 127 | 128 | resource_types: 129 | - name: git 130 | - name: slack-notification 131 | - name: pull-request 132 | - name: registry-image 133 | -------------------------------------------------------------------------------- /ci/pipeline.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #@ load("funcs.lib.yml", "slack_hook", "pr_hook") 3 | #@ load("@ytt:data", "data") 4 | #@ load("@ytt:template", "template") 5 | 6 | #@ env = data.values.env 7 | 8 | #! JOBS 9 | 10 | jobs: 11 | - name: set-pipeline 12 | plan: 13 | - get: src 14 | trigger: true 15 | params: 16 | #@ if/end env != 'dev': 17 | depth: 1 18 | #@ if/end env == 'dev': 19 | integration_tool: checkout 20 | - get: pipeline-tasks 21 | - get: general-task 22 | - task: boot 23 | image: general-task 24 | file: pipeline-tasks/tasks/boot.yml 25 | params: 26 | ENV_OVERRIDE: ((deploy-env)) 27 | - set_pipeline: self 28 | file: compiled/set-pipeline.yml 29 | instance_vars: 30 | deploy-env: ((deploy-env)) 31 | 32 | #@ if/end env == 'staging': 33 | - name: update-release-branch 34 | plan: 35 | - get: src 36 | trigger: true 37 | - get: general-task 38 | - get: pipeline-tasks 39 | - task: update-release-branch 40 | image: general-task 41 | file: pipeline-tasks/tasks/update-release-branch.yml 42 | 43 | - name: test 44 | plan: 45 | - get: src 46 | trigger: true 47 | passed: [set-pipeline] 48 | params: 49 | #@ if/end env != 'dev': 50 | depth: 1 51 | #@ if/end env == 'dev': 52 | integration_tool: checkout 53 | #@ if/end env == 'dev': 54 | - #@ pr_hook("pending", "test-pages-build-container") 55 | - get: python 56 | - task: test 57 | image: python 58 | file: src/ci/partials/test.yml 59 | 60 | on_success: 61 | in_parallel: 62 | - #@ slack_hook("success", "tests") 63 | #@ if/end env == 'dev': 64 | - #@ pr_hook("success", "test-pages-build-container") 65 | 66 | on_failure: 67 | in_parallel: 68 | - #@ slack_hook("failure", "tests") 69 | #@ if/end env == 'dev': 70 | - #@ pr_hook("failure", "test-pages-build-container") 71 | 72 | - name: deploy 73 | plan: 74 | - get: src 75 | trigger: true 76 | passed: 77 | - test 78 | #@ if/end env != 'dev': 79 | - audit-dependencies 80 | params: 81 | #@ if/end env != 'dev': 82 | depth: 1 83 | #@ if/end env == 'dev': 84 | integration_tool: checkout 85 | - get: general-task 86 | - get: oci-build-task 87 | - task: build 88 | privileged: true 89 | image: oci-build-task 90 | file: src/ci/partials/build.yml 91 | 92 | - put: image-repository 93 | params: 94 | image: image/image.tar 95 | - task: deploy 96 | image: general-task 97 | file: src/ci/partials/deploy.yml 98 | params: 99 | _: #@ template.replace(data.values.env_cf) 100 | CF_APP_NAME: pages-build-container-((deploy-env)) 101 | CF_MANIFEST: .cloudgov/manifest.yml 102 | CF_VARS_FILE: .cloudgov/vars/pages-((deploy-env)).yml 103 | IMAGE_REPOSITORY: ../image-repository/repository 104 | IMAGE_TAG: pages-((deploy-env)) 105 | CF_DOCKER_USERNAME: ((ecr-aws-key)) 106 | CF_DOCKER_PASSWORD: ((ecr-aws-secret)) 107 | 108 | on_success: 109 | in_parallel: 110 | - #@ slack_hook("success", "deployment") 111 | #@ if/end env == 'dev': 112 | - #@ pr_hook("success", "deploy-pages-build-container") 113 | 114 | on_failure: 115 | in_parallel: 116 | - #@ slack_hook("failure", "deployment") 117 | #@ if/end env == 'dev': 118 | - #@ pr_hook("failure", "deploy-pages-build-container") 119 | 120 | - name: audit-dependencies 121 | plan: 122 | - get: src 123 | trigger: true 124 | passed: [set-pipeline] 125 | params: 126 | #@ if/end env != 'dev': 127 | depth: 1 128 | #@ if/end env == 'dev': 129 | integration_tool: checkout 130 | 131 | #@ if/end env == 'dev': 132 | - #@ pr_hook("pending", "audit-dependencies") 133 | 134 | - get: python 135 | - task: pip-audit 136 | image: python 137 | file: src/ci/partials/audit.yml 138 | 139 | on_success: 140 | in_parallel: 141 | - #@ slack_hook("success", "dependency audit") 142 | #@ if/end env == 'dev': 143 | - #@ pr_hook("success", "audit-dependencies") 144 | 145 | on_failure: 146 | in_parallel: 147 | - #@ slack_hook("failure", "dependency audit") 148 | #@ if/end env == 'dev': 149 | - #@ pr_hook("failure", "audit-dependencies") 150 | 151 | #@ if/end env == 'production': 152 | - name: release 153 | plan: 154 | - get: src 155 | params: { depth: 1 } 156 | trigger: true 157 | passed: [deploy] 158 | - #@ template.replace(data.values.release_steps) 159 | 160 | #! RESOURCES 161 | 162 | resources: 163 | #@ if/end env == 'dev': 164 | - name: src 165 | type: pull-request 166 | check_every: 1m 167 | source: 168 | repository: ((build-container-repository-path)) 169 | access_token: ((gh-access-token)) 170 | base_branch: main 171 | disable_forks: true 172 | ignore_drafts: false 173 | 174 | #@ if/end env == 'staging': 175 | - name: src 176 | type: git 177 | icon: github 178 | source: 179 | uri: git@github.com:/((build-container-repository-path)) 180 | branch: main 181 | commit_verification_keys: ((cloud-gov-pages-gpg-keys)) 182 | private_key: ((pages-gpg-operations-github-sshkey.private_key)) 183 | 184 | #@ if env == 'production': 185 | - name: src 186 | icon: github 187 | source: 188 | uri: ((git-base-url))/((build-container-repository-path)) 189 | branch: main 190 | commit_verification_keys: ((cloud-gov-pages-gpg-keys)) 191 | tag_filter: 0.*.* 192 | fetch_tags: true 193 | 194 | - name: pages-release 195 | type: github-release 196 | source: 197 | owner: cloud-gov 198 | repository: pages-build-container 199 | access_token: ((gh-access-token)) 200 | #@ end 201 | 202 | - name: image-repository 203 | type: registry-image 204 | source: 205 | aws_access_key_id: ((ecr-aws-key)) 206 | aws_secret_access_key: ((ecr-aws-secret)) 207 | repository: pages-build-container 208 | aws_region: us-gov-west-1 209 | tag: pages-((deploy-env)) 210 | 211 | - name: slack 212 | - name: pipeline-tasks 213 | - name: python 214 | - name: general-task 215 | - name: oci-build-task 216 | 217 | #! RESOURCE TYPES 218 | 219 | resource_types: 220 | - name: git 221 | - name: slack-notification 222 | - name: registry-image 223 | #@ if/end env == 'dev': 224 | - name: pull-request 225 | #@ if/end env == 'production': 226 | - name: github-release 227 | -------------------------------------------------------------------------------- /ci/tasks/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cf api $CF_API 6 | cf auth 7 | 8 | cf t -o $CF_ORG -s $CF_SPACE 9 | 10 | cf push $CF_APP_NAME \ 11 | -f $CF_MANIFEST \ 12 | --vars-file $CF_VARS_FILE \ 13 | --docker-image "$(cat ${IMAGE_REPOSITORY}):${IMAGE_TAG}" \ 14 | --docker-username ${CF_DOCKER_USERNAME} 15 | -------------------------------------------------------------------------------- /ci/tasks/pip-audit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | pip install pip-audit 6 | 7 | python3 -m pip_audit -r ./requirements.txt 8 | -------------------------------------------------------------------------------- /ci/tasks/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | getent group rvm || groupadd -r rvm 6 | id -u customer &>/dev/null || useradd --no-log-init --system --create-home --groups rvm customer 7 | 8 | pip install -r requirements-dev.txt 9 | flake8 10 | bandit -r src 11 | 12 | pytest --cov-report xml:./coverage/coverage.xml --cov-report html:./coverage --cov-report term --cov=src; status=$? 13 | 14 | exit $status 15 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | exp: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile-exp 7 | volumes: 8 | - ./.local:/tmp/local:ro 9 | links: 10 | - echoserver 11 | - db 12 | depends_on: 13 | - echoserver 14 | - db 15 | environment: 16 | CACHE_CONTROL: max-age=60 17 | DATABASE_URL: postgresql://postgres:password@db/pages 18 | USER_ENVIRONMENT_VARIABLE_KEY: shhhhhhh 19 | 20 | app: 21 | build: 22 | context: . 23 | volumes: 24 | - ./src:/app:ro 25 | - ./.local:/tmp/local:ro 26 | links: 27 | - echoserver 28 | - db 29 | depends_on: 30 | - echoserver 31 | - db 32 | environment: 33 | CACHE_CONTROL: max-age=60 34 | DATABASE_URL: postgresql://postgres:password@db/pages 35 | USER_ENVIRONMENT_VARIABLE_KEY: shhhhhhh 36 | 37 | echoserver: 38 | # simple python server to log requests during development 39 | build: 40 | context: ./echo-server 41 | dockerfile: Dockerfile 42 | container_name: echoserver 43 | volumes: 44 | - ./echo-server:/code 45 | environment: 46 | PORT: 8989 47 | ports: 48 | - "8989:8989" 49 | 50 | db: 51 | build: 52 | context: . 53 | dockerfile: Dockerfile-db 54 | container_name: db 55 | volumes: 56 | - ./tmp/db:/var/lib/postgresql/data 57 | healthcheck: 58 | test: ["CMD-SHELL", "pg_isready -U postgres -d pages"] 59 | interval: 10s 60 | timeout: 5s 61 | retries: 5 62 | environment: 63 | POSTGRES_PASSWORD: password 64 | POSTGRES_USER: postgres 65 | POSTGRES_DB: pages 66 | 67 | test: 68 | build: 69 | context: . 70 | dockerfile: Dockerfile-test 71 | container_name: test 72 | volumes: 73 | - ./:/app -------------------------------------------------------------------------------- /docker/ua-attach-config.sh: -------------------------------------------------------------------------------- 1 | UA_TOKEN=`cat /run/secrets/UA_TOKEN` 2 | 3 | echo "Configuring ua attach config" 4 | cat <> ua-attach-config.yaml 5 | token: $UA_TOKEN 6 | enable_services: 7 | - usg 8 | - esm-infra -------------------------------------------------------------------------------- /echo-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-alpine 2 | 3 | WORKDIR /code 4 | ADD run.py /code 5 | 6 | EXPOSE 8989 7 | 8 | CMD ["python", "run.py"] 9 | -------------------------------------------------------------------------------- /echo-server/run.py: -------------------------------------------------------------------------------- 1 | # Reflects the requests from HTTP methods GET, POST, PUT, and DELETE 2 | # 3 | # Based on https://gist.github.com/1kastner/e083f9e813c0464e6a2ec8910553e632 4 | 5 | import os 6 | import base64 7 | import json 8 | import threading 9 | 10 | from http.server import HTTPServer, BaseHTTPRequestHandler 11 | 12 | 13 | def flush_print(s): 14 | print(s, flush=True) 15 | 16 | 17 | def decodeb64(s): 18 | return str(base64.b64decode(s), 'utf-8') 19 | 20 | 21 | class StoppableHTTPServer(HTTPServer): 22 | def run(self): 23 | try: 24 | self.serve_forever() 25 | except Exception: # pylint: disable=W0703 26 | pass 27 | finally: 28 | self.server_close() 29 | 30 | 31 | class RequestHandler(BaseHTTPRequestHandler): 32 | def do_GET(self): 33 | flush_print(f"\n{self.command} {self.path}") 34 | 35 | self.send_response(200) 36 | self.end_headers() 37 | 38 | def do_POST(self): 39 | flush_print(f"\n{self.command} {self.path}") 40 | 41 | content_length = self.headers.get('Content-Length') 42 | length = int(content_length) if content_length else 0 43 | payload = self.rfile.read(length) 44 | 45 | content_type = self.headers.get('Content-Type') 46 | if content_type == 'application/json': 47 | payload_json = json.loads(str(payload, 'utf-8')) 48 | if payload_json.get('output'): 49 | payload_json['output'] = decodeb64(payload_json['output']) 50 | if payload_json.get('message'): 51 | payload_json['message'] = decodeb64(payload_json['message']) 52 | 53 | payload = json.dumps(payload_json) 54 | 55 | flush_print(f" {payload}") 56 | 57 | self.send_response(200) 58 | self.end_headers() 59 | 60 | do_PUT = do_POST 61 | do_DELETE = do_GET 62 | 63 | 64 | def main(): 65 | port = int(os.getenv('PORT', 8080)) 66 | host = os.getenv('HOST', '0.0.0.0') 67 | print(f'Listening on {host}:{port}') 68 | server = StoppableHTTPServer((host, port), RequestHandler) 69 | 70 | # Start processing requests 71 | thread = threading.Thread(None, server.run) 72 | thread.start() 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | test_paths = src test 3 | addopts = --doctest-modules 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Include the production dependencies 2 | -r requirements.txt 3 | 4 | # Testing and development dependencies 5 | bandit>=1.0,<2.0 6 | flake8==3.8.3 7 | moto==5.0.1 8 | pip-audit==2.7.3 9 | pyfakefs==4.0.2 10 | pyflakes==2.2.0 11 | pylint==2.5.3 12 | pytest-cov==4.1.0 13 | pytest==7.4.4 14 | requests-mock==1.8.0 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.4 2 | boto3==1.34.102 3 | stopit==1.1.2 4 | psycopg2==2.9.9 5 | cryptography==44.0.1 6 | pyyaml==6.0.1 7 | psutil==5.9.4 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | max-line-length = 100 -------------------------------------------------------------------------------- /src/build.py: -------------------------------------------------------------------------------- 1 | '''Main entrypoint''' 2 | 3 | import os 4 | import sys 5 | from datetime import datetime 6 | from stopit import TimeoutException, SignalTimeout as Timeout 7 | import boto3 8 | from functools import partial 9 | 10 | from common import CLONE_DIR_PATH 11 | 12 | from log_utils import ( 13 | delta_to_mins_secs, get_logger, init_logging, 14 | RepeatTimer, log_monitoring_metrics 15 | ) 16 | from log_utils.remote_logs import ( 17 | post_build_complete, post_build_error, 18 | post_build_timeout, post_build_processing, 19 | post_metrics, 20 | ) 21 | 22 | from crypto.decrypt import decrypt 23 | 24 | import repo_config 25 | 26 | from steps import ( 27 | build_hugo, build_jekyll, build_static, download_hugo, 28 | fetch_repo, publish, run_build_script, run_step, fetch_commit_sha, 29 | setup_bundler, setup_node, setup_ruby, StepException, update_repo 30 | ) 31 | 32 | TIMEOUT_SECONDS = 45 * 60 # 45 minutes 33 | 34 | GENERATORS = ['hugo', 'jekyll', 'node.js', 'static'] 35 | 36 | 37 | def build( 38 | aws_access_key_id, 39 | aws_default_region, 40 | aws_secret_access_key, 41 | status_callback, 42 | baseurl, 43 | branch, 44 | bucket, 45 | build_id, 46 | config, 47 | generator, 48 | github_token, 49 | owner, 50 | repository, 51 | site_prefix, 52 | user_environment_variables=[] 53 | ): 54 | ''' 55 | Main task to run a full site build process. 56 | 57 | All values needed for the build are loaded from 58 | environment variables. 59 | ''' 60 | # keep track of total time 61 | start_time = datetime.now() 62 | 63 | logger = None 64 | commit_sha = None 65 | thread = None 66 | 67 | cache_control = os.getenv('CACHE_CONTROL', 'max-age=60') 68 | database_url = os.environ['DATABASE_URL'] 69 | user_environment_variable_key = os.environ['USER_ENVIRONMENT_VARIABLE_KEY'] 70 | 71 | try: 72 | post_build_processing(status_callback) 73 | # throw a timeout exception after TIMEOUT_SECONDS 74 | with Timeout(TIMEOUT_SECONDS, swallow_exc=False): 75 | build_info = f'{owner}/{repository}@id:{build_id}' 76 | 77 | decrypted_uevs = decrypt_uevs(user_environment_variable_key, user_environment_variables) 78 | 79 | priv_vals = [uev['value'] for uev in decrypted_uevs] 80 | priv_vals.append(aws_access_key_id) 81 | priv_vals.append(aws_secret_access_key) 82 | if github_token: 83 | priv_vals.append(github_token) 84 | 85 | logattrs = { 86 | 'branch': branch, 87 | 'buildid': build_id, 88 | 'owner': owner, 89 | 'repository': repository, 90 | } 91 | 92 | init_logging(priv_vals, logattrs, database_url) 93 | 94 | logger = get_logger('main') 95 | 96 | # partially apply the callback url to post_metrics 97 | post_metrics_p = partial(post_metrics, status_callback) 98 | 99 | logger.info(f'Running build for {owner}/{repository}/{branch}') 100 | 101 | if generator not in GENERATORS: 102 | raise ValueError(f'Invalid generator: {generator}') 103 | 104 | # start a separate scheduled thread for memory/cpu monitoring 105 | MONITORING_INTERVAL = 30 106 | monitoring_logger = get_logger('monitor') 107 | thread = RepeatTimer( 108 | MONITORING_INTERVAL, 109 | log_monitoring_metrics, 110 | [monitoring_logger, post_metrics_p], 111 | ) 112 | thread.start() 113 | 114 | # S3 client used in multiple steps 115 | s3_client = boto3.client( 116 | service_name='s3', 117 | aws_access_key_id=aws_access_key_id, 118 | aws_secret_access_key=aws_secret_access_key, 119 | region_name=aws_default_region 120 | ) 121 | 122 | ## 123 | # FETCH 124 | # 125 | run_step( 126 | fetch_repo, 127 | 'There was a problem fetching the repository, see the above logs for details.', 128 | owner, repository, branch, github_token, 129 | ) 130 | 131 | commit_sha = fetch_commit_sha(CLONE_DIR_PATH) 132 | 133 | federalist_config = repo_config.from_json_file( 134 | CLONE_DIR_PATH, 135 | dict( 136 | headers=dict([('cache-control', cache_control)]), 137 | excludePaths=[ 138 | '*/Dockerfile', 139 | '*/docker-compose.yml', 140 | '/federalist.json', 141 | '/pages.json' 142 | ], 143 | includePaths=['/.well-known/security.txt'] 144 | ) 145 | ) 146 | 147 | if federalist_config.full_clone(): 148 | run_step( 149 | update_repo, 150 | 'There was a problem updating the repository, see the above logs for details.', 151 | CLONE_DIR_PATH, 152 | ) 153 | 154 | ## 155 | # BUILD 156 | # 157 | run_step( 158 | setup_node, 159 | 'There was a problem setting up Node, see the above logs for details.', 160 | federalist_config.should_cache(), 161 | bucket, 162 | s3_client, 163 | post_metrics_p, 164 | ) 165 | 166 | # Run the npm `federalist` task (if it is defined) 167 | run_step( 168 | run_build_script, 169 | 'There was a problem running the federalist script, see the above logs for details.', # noqa: E501 170 | branch, owner, repository, site_prefix, baseurl, decrypted_uevs, 171 | ) 172 | 173 | # Run the appropriate build engine based on generator 174 | if generator == 'jekyll': 175 | run_step( 176 | setup_ruby, 177 | 'There was a problem setting up Ruby, see the above logs for details.', 178 | federalist_config.should_cache(), post_metrics_p, 179 | ) 180 | 181 | run_step( 182 | setup_bundler, 183 | 'There was a problem setting up Bundler, see the above logs for details.', 184 | federalist_config.should_cache(), bucket, s3_client, 185 | ) 186 | 187 | run_step( 188 | build_jekyll, 189 | 'There was a problem running Jekyll, see the above logs for details.', 190 | branch, owner, repository, site_prefix, baseurl, config, decrypted_uevs, 191 | ) 192 | 193 | elif generator == 'hugo': 194 | # extra: --hugo-version (not yet used) 195 | run_step( 196 | download_hugo, 197 | 'There was a problem downloading Hugo, see the above logs for details.', 198 | post_metrics_p 199 | ) 200 | 201 | run_step( 202 | build_hugo, 203 | 'There was a problem running Hugo, see the above logs for details.', 204 | branch, owner, repository, site_prefix, baseurl, decrypted_uevs, 205 | ) 206 | 207 | elif generator == 'static': 208 | # no build arguments are needed 209 | build_static() 210 | 211 | elif (generator == 'node.js' or generator == 'script only'): 212 | logger.info('build already ran in \'npm run federalist\'') 213 | 214 | else: 215 | raise ValueError(f'Invalid generator: {generator}') 216 | 217 | ## 218 | # PUBLISH 219 | # 220 | publish(baseurl, site_prefix, bucket, federalist_config, s3_client) 221 | 222 | delta_string = delta_to_mins_secs(datetime.now() - start_time) 223 | logger.info(f'Total build time: {delta_string}') 224 | 225 | # Finished! 226 | post_build_complete(status_callback, commit_sha) 227 | 228 | sys.exit(0) 229 | 230 | except StepException as err: 231 | ''' 232 | Thrown when a step itself fails, usually because a command exited 233 | with a non-zero return code 234 | ''' 235 | logger.error(str(err)) 236 | post_build_error(status_callback, str(err), commit_sha) 237 | sys.exit(1) 238 | 239 | except TimeoutException: 240 | logger.warning(f'Build({build_info}) has timed out') 241 | post_build_timeout(status_callback, commit_sha) 242 | 243 | except Exception as err: # pylint: disable=W0703 244 | # Getting here means something really weird has happened 245 | # since all errors caught during tasks should be caught 246 | # in the previous block as `UnexpectedExit` exceptions. 247 | err_string = str(err) 248 | 249 | # log the original exception 250 | msg = f'Unexpected exception raised during build({build_info}): {err_string}' 251 | if logger: 252 | logger.warning(msg) 253 | else: 254 | print(msg) 255 | 256 | err_message = ( 257 | f'Unexpected build({build_info}) error. Please try ' 258 | 'again and contact pages-support if it persists.' 259 | ) 260 | 261 | post_build_error(status_callback, err_message, commit_sha) 262 | finally: 263 | if thread: 264 | thread.cancel() 265 | 266 | 267 | def decrypt_uevs(key, uevs): 268 | return [{ 269 | 'name': uev['name'], 270 | 'value': decrypt(uev['ciphertext'], key) 271 | } for uev in uevs] 272 | -------------------------------------------------------------------------------- /src/common.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Common variables, tasks, and functions 3 | ''' 4 | 5 | from pathlib import Path 6 | 7 | REPO_BASE_URL = 'github.com' 8 | 9 | WORKING_DIR_PATH = Path('/tmp/work') # nosec 10 | 11 | CLONE_DIR = 'site_repo' 12 | CLONE_DIR_PATH = WORKING_DIR_PATH / CLONE_DIR 13 | 14 | SITE_BUILD_DIR = '_site' 15 | SITE_BUILD_DIR_PATH = CLONE_DIR_PATH / SITE_BUILD_DIR 16 | 17 | STATUS_COMPLETE = 'success' 18 | STATUS_ERROR = 'error' 19 | STATUS_PROCESSING = 'processing' 20 | -------------------------------------------------------------------------------- /src/crypto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/src/crypto/__init__.py -------------------------------------------------------------------------------- /src/crypto/decrypt.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from cryptography.hazmat.backends import default_backend 3 | from cryptography.hazmat.primitives.ciphers import ( 4 | Cipher, algorithms, modes 5 | ) 6 | 7 | 8 | def decrypt(ciphertext, key): 9 | m = hashlib.sha256() 10 | m.update(key.encode()) 11 | hashed_key = m.digest() 12 | 13 | auth_tag, iv, encrypted = [ 14 | bytes.fromhex(hex) for hex in ciphertext.split(':') 15 | ] 16 | 17 | decryptor = Cipher( 18 | algorithms.AES(hashed_key), 19 | modes.GCM(iv, auth_tag), 20 | backend=default_backend() 21 | ).decryptor() 22 | 23 | return (decryptor.update(encrypted) + decryptor.finalize()).decode() 24 | -------------------------------------------------------------------------------- /src/log_utils/__init__.py: -------------------------------------------------------------------------------- 1 | '''Logging stuff''' 2 | 3 | from .get_logger import get_logger, init_logging 4 | from .delta_to_mins_secs import delta_to_mins_secs 5 | from .monitoring import RepeatTimer, log_monitoring_metrics 6 | 7 | __all__ = [ 8 | 'delta_to_mins_secs', 'get_logger', 'init_logging', 'RepeatTimer', 'log_monitoring_metrics'] 9 | -------------------------------------------------------------------------------- /src/log_utils/common.py: -------------------------------------------------------------------------------- 1 | STATUS_COMPLETE = 'success' 2 | STATUS_ERROR = 'error' 3 | STATUS_PROCESSING = 'processing' 4 | -------------------------------------------------------------------------------- /src/log_utils/db_handler.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import logging 3 | import psycopg2 4 | 5 | 6 | class DBHandler(logging.Handler): 7 | def __init__(self, conn_url, build_id): 8 | self.conn_url = conn_url 9 | self.build_id = build_id 10 | self.source = 'ALL' 11 | 12 | self.conn = None 13 | 14 | try: 15 | self.conn = psycopg2.connect(self.conn_url) 16 | except Exception: 17 | raise Exception(f'Cannot connect to {self.conn_url}') 18 | 19 | logging.Handler.__init__(self) 20 | 21 | def emit(self, record): 22 | try: 23 | now = datetime.now() 24 | self.exec( 25 | ('INSERT INTO buildlog ' 26 | '(build, source, output, "createdAt", "updatedAt") ' 27 | 'VALUES (%s, %s, %s, %s, %s);'), 28 | (self.build_id, self.source, self.format(record), now, now) 29 | ) 30 | except Exception: 31 | self.handleError(record) 32 | 33 | def close(self): 34 | self.conn.close() 35 | 36 | def exec(self, stmt, args): 37 | cursor = self.conn.cursor() 38 | cursor.execute(stmt, args) 39 | self.conn.commit() 40 | cursor.close() 41 | -------------------------------------------------------------------------------- /src/log_utils/delta_to_mins_secs.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta # noqa pylint: disable=W0611 2 | 3 | 4 | def delta_to_mins_secs(delta): 5 | ''' 6 | Converts a timedelta to a string of minutes and seconds. 7 | 8 | >>> td = timedelta(seconds=55) 9 | >>> delta_to_mins_secs(td) 10 | '55s' 11 | 12 | >>> td = timedelta(seconds=124) 13 | >>> delta_to_mins_secs(td) 14 | '2m 4s' 15 | ''' 16 | secs = int(delta.total_seconds()) 17 | if secs > 60: 18 | mins = int(secs // 60) 19 | secs = int(secs % 60) 20 | return f'{mins}m {secs}s' 21 | # else 22 | return f'{secs}s' 23 | -------------------------------------------------------------------------------- /src/log_utils/get_logger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Setup nice logs. 3 | Clients should use the `get_logger` method to get a logger instance. 4 | ''' 5 | 6 | import sys 7 | import logging 8 | import logging.handlers 9 | 10 | from .db_handler import DBHandler 11 | 12 | DEFAULT_LOG_LEVEL = logging.INFO 13 | 14 | LOG_ATTRS = {} 15 | 16 | 17 | class LogFilter(logging.Filter): 18 | ''' 19 | For every log message, replaces any of the values found in `priv_values` 20 | with the provided or default `mask` text. In addition, this prevents empty 21 | messages from being logged at all. 22 | ''' 23 | DEFAULT_MASK = '[PRIVATE VALUE HIDDEN]' 24 | INVALID_ACCESS_KEY = 'InvalidAccessKeyId' 25 | 26 | def __init__(self, priv_vals, mask=DEFAULT_MASK): 27 | self.priv_vals = priv_vals 28 | self.mask = mask 29 | logging.Filter.__init__(self) 30 | 31 | def filter(self, record): 32 | for priv_val in self.priv_vals: 33 | record.msg = record.msg.replace(priv_val, self.mask) 34 | 35 | if self.INVALID_ACCESS_KEY in record.msg: 36 | record.msg = ( 37 | 'Whoops, our S3 keys were rotated during your ' 38 | 'build and became out of date. This was not a ' 39 | 'problem with your site build, but if you restart ' 40 | 'the failed build it should work on the next try. ' 41 | 'Sorry for the inconvenience!' 42 | ) 43 | 44 | return len(record.msg) > 0 45 | 46 | 47 | class Formatter(logging.Formatter): 48 | ''' 49 | A more forgiving formatter that will fill in blank values if our custom 50 | attributes are missing 51 | ''' 52 | def __init__(self, keys, *args, **kwargs): 53 | self.keys = keys 54 | logging.Formatter.__init__(self, *args, **kwargs) 55 | 56 | def format(self, record): 57 | ''' 58 | Add missing values before formatting as normal 59 | ''' 60 | for key in self.keys: 61 | if (key not in record.__dict__): 62 | record.__dict__[key] = '' 63 | 64 | return super().format(record) 65 | 66 | 67 | def get_logger(name): 68 | ''' 69 | Gets a logger instance configured with our formatter and handler 70 | for the given name. 71 | ''' 72 | logger = logging.getLogger(name) 73 | 74 | return logging.LoggerAdapter(logger, LOG_ATTRS) 75 | 76 | 77 | def set_log_attrs(attrs): 78 | global LOG_ATTRS 79 | LOG_ATTRS = attrs 80 | 81 | 82 | def init_logging(private_values, attrs, db_url): 83 | global LOG_ATTRS 84 | LOG_ATTRS = attrs 85 | 86 | date_fmt = '%Y-%m-%d %H:%M:%S' 87 | style_fmt = '{' 88 | short_fmt = '{asctime} {levelname} [{name}] {message}' 89 | long_fmt = '{asctime} {levelname} [{name}] ' 90 | for key in attrs.keys(): 91 | long_fmt = long_fmt + '@' + key + ': {' + key + '} ' 92 | 93 | long_fmt = long_fmt + '@message: {message}' 94 | 95 | extra_attrs = attrs.keys() 96 | 97 | log_filter = LogFilter(private_values) 98 | 99 | log_level = DEFAULT_LOG_LEVEL 100 | 101 | stream_formatter = Formatter(extra_attrs, long_fmt, date_fmt, style_fmt) 102 | 103 | stream_handler = logging.StreamHandler(sys.stdout) 104 | stream_handler.setFormatter(stream_formatter) 105 | stream_handler.setLevel(log_level) 106 | stream_handler.addFilter(log_filter) 107 | 108 | handlers = [stream_handler] 109 | 110 | # configure db logging 111 | build_id = attrs['buildid'] 112 | db_formatter = logging.Formatter(short_fmt, date_fmt, style_fmt) 113 | 114 | db_handler = DBHandler(db_url, build_id) 115 | db_handler.setFormatter(db_formatter) 116 | db_handler.setLevel(log_level) 117 | db_handler.addFilter(log_filter) 118 | 119 | handlers.append(db_handler) 120 | 121 | logging.basicConfig(level=log_level, handlers=handlers) 122 | -------------------------------------------------------------------------------- /src/log_utils/monitoring.py: -------------------------------------------------------------------------------- 1 | from threading import Timer 2 | import psutil 3 | 4 | max_metrics = dict( 5 | cpu=0, 6 | mem=0, 7 | disk=0 8 | ) 9 | 10 | 11 | # https://stackoverflow.com/a/48741004 12 | class RepeatTimer(Timer): 13 | def run(self): 14 | while not self.finished.wait(self.interval): 15 | self.function(*self.args, **self.kwargs) 16 | 17 | 18 | def log_monitoring_metrics(logger, post_metrics): 19 | disk = psutil.disk_usage("/") 20 | 21 | # compute new maximum metrics and post to the application 22 | max_metrics["cpu"] = max(psutil.cpu_percent(), max_metrics["cpu"]) 23 | max_metrics["mem"] = max(psutil.virtual_memory().percent, max_metrics["mem"]) 24 | max_metrics["disk"] = max(disk.used, max_metrics["disk"]) 25 | 26 | post_metrics(dict(machine=max_metrics)) 27 | -------------------------------------------------------------------------------- /src/log_utils/remote_logs.py: -------------------------------------------------------------------------------- 1 | '''Functions for sending remote logs''' 2 | 3 | import base64 4 | import requests 5 | from typing import Dict 6 | 7 | from .common import (STATUS_COMPLETE, STATUS_ERROR, STATUS_PROCESSING) 8 | 9 | 10 | def b64string(text): 11 | ''' 12 | Base64 encodes a string as utf-8 13 | 14 | >>> b64string('boop') 15 | 'Ym9vcA==' 16 | ''' 17 | return base64.b64encode(text.encode('utf-8')).decode('utf-8') 18 | 19 | 20 | def post_status(status_callback_url, status, output='', commit_sha=None): 21 | ''' 22 | POSTs `status` and `output` to the `status_callback_url` 23 | ''' 24 | requests.post( 25 | status_callback_url, 26 | json={ 27 | 'status': status, 28 | 'message': b64string(output), 29 | 'commit_sha': commit_sha, 30 | }, 31 | timeout=10 32 | ) 33 | 34 | 35 | def post_build_complete(status_callback_url, commit_sha): 36 | ''' 37 | POST a STATUS_COMPLETE status to the status_callback_url 38 | ''' 39 | post_status(status_callback_url, status=STATUS_COMPLETE, commit_sha=commit_sha) 40 | 41 | 42 | def post_build_error(status_callback_url, error_output, commit_sha=None): 43 | ''' 44 | POST a STATUS_ERROR status with message to the status_callback_url 45 | ''' 46 | # Post to the Pages web application endpoint with status and output 47 | post_status( 48 | status_callback_url, status=STATUS_ERROR, output=error_output, commit_sha=commit_sha 49 | ) 50 | 51 | 52 | def post_build_processing(status_callback_url): 53 | ''' 54 | POST a STATUS_PROCESSING status to the status_callback_url 55 | ''' 56 | post_status(status_callback_url, status=STATUS_PROCESSING) 57 | 58 | 59 | def post_build_timeout(status_callback_url, commit_sha=None): 60 | ''' 61 | POST a STATUS_ERROR status with timeout message to the status_callback_url 62 | ''' 63 | output = 'The build did not complete. It may have timed out.' 64 | 65 | # Post to the Pages web application with status and output 66 | post_status(status_callback_url, status=STATUS_ERROR, output=output, commit_sha=commit_sha) 67 | 68 | 69 | def post_metrics(status_callback_url: str, metrics: Dict): 70 | ''' 71 | POST build metrics to the metrics API 72 | ''' 73 | url = status_callback_url.replace('status', 'metrics') 74 | requests.post( 75 | url, 76 | json=metrics, 77 | timeout=10 78 | ) 79 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import inspect 3 | import json 4 | import os 5 | import shlex 6 | 7 | from build import build 8 | from crypto.decrypt import decrypt 9 | 10 | 11 | KEYS_TO_DECRYPT = [ 12 | 'STATUS_CALLBACK', 13 | 'GITHUB_TOKEN', 14 | 'AWS_ACCESS_KEY_ID', 15 | 'AWS_SECRET_ACCESS_KEY', 16 | 'BUCKET', 17 | ] 18 | 19 | 20 | def load_vcap(): 21 | vcap_application = json.loads(os.getenv('VCAP_APPLICATION', '{}')) 22 | vcap_services = json.loads(os.getenv('VCAP_SERVICES', '{}')) 23 | 24 | space = vcap_application['space_name'] 25 | 26 | space_prefix = 'pages-staging' if space == 'pages-staging' else f'federalist-{space}' 27 | 28 | uev_ups = next( 29 | ups for ups in vcap_services['user-provided'] 30 | if ups['name'] == f'{space_prefix}-uev-key' 31 | ) 32 | 33 | uev_env_var = 'USER_ENVIRONMENT_VARIABLE_KEY' 34 | os.environ[uev_env_var] = uev_ups['credentials']['key'] 35 | 36 | 37 | def decrypt_key_value(k, v, encryption_key): 38 | if k in KEYS_TO_DECRYPT: 39 | return decrypt(v, encryption_key) 40 | return v 41 | 42 | 43 | def decrypt_params(params): 44 | vcap_application = json.loads(os.getenv('VCAP_APPLICATION', '{}')) 45 | vcap_services = json.loads(os.getenv('VCAP_SERVICES', '{}')) 46 | 47 | space = vcap_application['space_name'] 48 | 49 | encryption_ups = next( 50 | ups for ups in vcap_services['user-provided'] 51 | if ups['name'] == f'pages-{space}-encryption' 52 | ) 53 | 54 | encryption_key = encryption_ups['credentials']['key'] 55 | 56 | params = {k: decrypt_key_value(k, v, encryption_key) for (k, v) in params.items()} 57 | 58 | return params 59 | 60 | 61 | if __name__ == "__main__": 62 | parser = argparse.ArgumentParser(description='Run a pages build') 63 | group = parser.add_mutually_exclusive_group(required=True) 64 | group.add_argument('-p', '--params', dest='params', 65 | help='A JSON encoded string', 66 | metavar="'{\"foo\": \"bar\"}'") 67 | group.add_argument('-f', '--file', dest='file', 68 | help='A path to a JSON file', type=argparse.FileType('r'), 69 | metavar="./foo.json") 70 | args = parser.parse_args() 71 | 72 | if args.params: 73 | params = json.loads(args.params) 74 | params = decrypt_params(params) 75 | else: 76 | params = json.load(args.file) 77 | 78 | params = {k.lower(): v for (k, v) in params.items()} 79 | 80 | build_arguments = inspect.getfullargspec(build)[0] 81 | for k in params: 82 | if k not in build_arguments: 83 | # Warn about unused arguments 84 | print(f'WARNING - Ignoring unused parameter: {k}') 85 | 86 | # Remove unused build arguments 87 | kwargs = {k: v for (k, v) in params.items() if k in build_arguments} 88 | 89 | if 'user_environment_variables' in kwargs: 90 | uevs = kwargs['user_environment_variables'] 91 | if uevs and isinstance(uevs, str): 92 | kwargs['user_environment_variables'] = json.loads(uevs) 93 | 94 | kwargs['branch'] = shlex.quote(kwargs['branch']) 95 | kwargs['owner'] = shlex.quote(kwargs['owner']) 96 | kwargs['repository'] = shlex.quote(kwargs['repository']) 97 | 98 | if os.getenv('VCAP_APPLICATION', None): 99 | load_vcap() 100 | 101 | build(**kwargs) 102 | -------------------------------------------------------------------------------- /src/publishing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/src/publishing/__init__.py -------------------------------------------------------------------------------- /src/publishing/models.py: -------------------------------------------------------------------------------- 1 | '''Classes for files published to S3''' 2 | 3 | import binascii 4 | import gzip 5 | import hashlib 6 | import mimetypes 7 | 8 | from datetime import datetime 9 | from os import path 10 | 11 | mimetypes.init() # must initialize mimetypes 12 | 13 | 14 | def remove_prefix(text, prefix): 15 | ''' 16 | Returns a copy of text with the given prefix removed. 17 | 18 | >>> remove_prefix('/ab/cd/ef', '/ab/cd') 19 | '/ef' 20 | 21 | >>> remove_prefix('abcd', '/ef') 22 | 'abcd' 23 | ''' 24 | if text.startswith(prefix): 25 | return text[len(prefix):] 26 | return text 27 | 28 | 29 | class SiteObject(): 30 | ''' 31 | An abstract class for an individual object that can be uploaded to S3 32 | ''' 33 | 34 | def __init__(self, filename, md5, site_prefix='', dir_prefix=''): 35 | self.filename = filename 36 | self.md5 = md5 37 | self.dir_prefix = dir_prefix 38 | self.site_prefix = site_prefix 39 | 40 | @property 41 | def s3_key(self): 42 | '''The object's key in the S3 bucket''' 43 | filename = self.filename 44 | if self.dir_prefix: 45 | filename = remove_prefix(filename, 46 | path.join(self.dir_prefix, '')) 47 | return f'{self.site_prefix}/{filename}' 48 | 49 | def upload_to_s3(self, bucket, s3_client): 50 | '''Upload this object to S3''' 51 | raise NotImplementedError # should be implemented in child classes 52 | 53 | def delete_from_s3(self, bucket, s3_client): 54 | '''Delete this object from S3''' 55 | s3_client.delete_object( 56 | Bucket=bucket, 57 | Key=self.s3_key, 58 | ) 59 | 60 | 61 | class SiteFile(SiteObject): 62 | '''A file produced during a site build''' 63 | 64 | GZIP_EXTENSIONS = ['html', 'css', 'js', 'json', 'svg'] 65 | 66 | def __init__(self, filename, dir_prefix, site_prefix, cache_control): 67 | super().__init__(filename=filename, 68 | md5=None, 69 | dir_prefix=dir_prefix, 70 | site_prefix=site_prefix) 71 | self._compress() 72 | self.md5 = self.generate_md5() 73 | self.cache_control = cache_control 74 | 75 | @property 76 | def is_compressible(self): 77 | '''Whether the file should be compressed''' 78 | _, file_extension = path.splitext(self.filename) 79 | # file_extension has a preceding '.' character, so use substring 80 | return file_extension[1:].lower() in self.GZIP_EXTENSIONS 81 | 82 | @property 83 | def content_encoding(self): 84 | '''"gzip" if the file is compressible, otherwise None''' 85 | if self.is_compressible: 86 | return 'gzip' 87 | return None 88 | 89 | @property 90 | def content_type(self): 91 | '''The best-guess mimetype of the file''' 92 | content_type, _ = mimetypes.guess_type(self.filename) 93 | return content_type 94 | 95 | @property 96 | def is_compressed(self): 97 | '''Checks to see if the file is already compressed''' 98 | with open(self.filename, 'rb') as test_f: 99 | # '1f8b' is the magic flag that gzipped files start with 100 | return binascii.hexlify(test_f.read(2)) == b'1f8b' 101 | 102 | def generate_md5(self): 103 | '''Generates an md5 hash of the file contents''' 104 | hash_md5 = hashlib.md5() # nosec 105 | 106 | with open(self.filename, 'rb') as file: 107 | for chunk in iter(lambda: file.read(4096), b""): 108 | hash_md5.update(chunk) 109 | return hash_md5.hexdigest() 110 | 111 | def _compress(self): 112 | '''GZips the file in-situ''' 113 | 114 | if not self.is_compressible: 115 | # shouldn't be compressed, so return 116 | return 117 | 118 | if self.is_compressed: 119 | # already compressed, so return 120 | return 121 | 122 | # otherwise, gzip the file in place 123 | with open(self.filename, 'rb') as f_in: 124 | contents = f_in.read() 125 | # Spoof the modification time so that MD5 hashes match next time 126 | spoofed_mtime = datetime(2014, 3, 19).timestamp() # March 19, 2014 127 | # Compress the contents and save over the original file 128 | with gzip.GzipFile(self.filename, mode='wb', 129 | mtime=spoofed_mtime) as gz_file: 130 | gz_file.write(contents) 131 | 132 | def upload_to_s3(self, bucket, s3_client): 133 | extra_args = { 134 | "CacheControl": self.cache_control, 135 | "ServerSideEncryption": "AES256", 136 | } 137 | 138 | if self.content_encoding: 139 | extra_args["ContentEncoding"] = self.content_encoding 140 | if self.content_type: 141 | extra_args["ContentType"] = self.content_type 142 | 143 | s3_client.upload_file( 144 | Filename=self.filename, 145 | Bucket=bucket, 146 | Key=self.s3_key, 147 | # For allowed ExtraArgs, see 148 | # https://boto3.readthedocs.io/en/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS 149 | ExtraArgs=extra_args, 150 | ) 151 | 152 | 153 | class SiteRedirect(SiteObject): 154 | ''' 155 | A redirect, typically from `/path/to/page => /path/to/page/` 156 | ''' 157 | 158 | def __init__(self, filename, dir_prefix, site_prefix, base_url, cache_control): 159 | super().__init__(filename=filename, 160 | dir_prefix=dir_prefix, 161 | md5=None, # update after super().__init()__ 162 | site_prefix=site_prefix) 163 | 164 | self.base_url = base_url 165 | self.cache_control = cache_control 166 | 167 | # The md5 hash is the hash of the destination string, not 168 | # of the file contents, for our redirect objects 169 | self.md5 = hashlib.md5(self.destination.encode()).hexdigest() # nosec 170 | 171 | @property 172 | def destination(self): 173 | '''The destination of the redirect object''' 174 | filename = self.filename 175 | 176 | if self.dir_prefix: 177 | if filename == self.dir_prefix: 178 | return f'{self.base_url}/' 179 | 180 | filename = remove_prefix(filename, 181 | path.join(self.dir_prefix, '')) 182 | 183 | return f'{self.base_url}/{filename}/' 184 | 185 | @property 186 | def s3_key(self): 187 | filename = self.filename 188 | 189 | if self.dir_prefix: 190 | if filename == self.dir_prefix: 191 | # then this is 'root' site redirect object 192 | # (ie, the main index.html file) 193 | return self.site_prefix 194 | 195 | filename = remove_prefix(filename, 196 | path.join(self.dir_prefix, '')) 197 | 198 | return f'{self.site_prefix}/{filename}' 199 | 200 | def upload_to_s3(self, bucket, s3_client): 201 | '''Uploads the redirect object to S3''' 202 | s3_client.put_object( 203 | Body=self.destination, 204 | Bucket=bucket, 205 | Key=self.s3_key, 206 | ServerSideEncryption='AES256', 207 | WebsiteRedirectLocation=self.destination, 208 | CacheControl=self.cache_control 209 | ) 210 | -------------------------------------------------------------------------------- /src/publishing/s3publisher.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Classes and methods for publishing a directory to S3 3 | ''' 4 | 5 | import requests 6 | 7 | from os import path, makedirs, walk, getenv 8 | from concurrent.futures import ThreadPoolExecutor 9 | 10 | from log_utils import get_logger 11 | from .models import (remove_prefix, SiteObject, SiteFile, SiteRedirect) 12 | 13 | MAX_S3_KEYS_PER_REQUEST = 1000 14 | FEDERALIST_JSON = 'federalist.json' 15 | MAX_WORKERS = getenv('MAX_WORKERS', 8) 16 | 17 | 18 | def list_remote_objects(bucket, site_prefix, s3_client): 19 | ''' 20 | 21 | Generates a list of remote S3 objects that have keys starting with 22 | site_prefix in the given bucket. 23 | 24 | ''' 25 | results_truncated = True 26 | continuation_token = None 27 | 28 | remote_objects = [] 29 | 30 | while results_truncated: 31 | prefix = site_prefix 32 | # Add a / to the end of the prefix to prevent 33 | # retrieving keys for sites with site_prefixes 34 | # that are substrings of others 35 | if prefix[-1] != '/': 36 | prefix += '/' 37 | 38 | request_kwargs = { 39 | 'Bucket': bucket, 40 | 'MaxKeys': MAX_S3_KEYS_PER_REQUEST, 41 | 'Prefix': prefix, 42 | } 43 | 44 | if continuation_token: 45 | request_kwargs['ContinuationToken'] = continuation_token 46 | 47 | response = s3_client.list_objects_v2(**request_kwargs) 48 | 49 | contents = response.get('Contents') 50 | if not contents: 51 | return remote_objects 52 | 53 | for response_obj in contents: 54 | # remove the site_prefix from the key 55 | filename = remove_prefix(response_obj['Key'], site_prefix) 56 | 57 | # remove initial slash if present 58 | filename = remove_prefix(filename, '/') 59 | 60 | # the etag comes surrounded by double quotes, so remove them 61 | md5 = response_obj['ETag'].replace('"', '') 62 | 63 | site_obj = SiteObject(filename=filename, md5=md5, 64 | site_prefix=site_prefix) 65 | remote_objects.append(site_obj) 66 | 67 | results_truncated = response['IsTruncated'] 68 | if results_truncated: 69 | continuation_token = response['NextContinuationToken'] 70 | 71 | return remote_objects 72 | 73 | 74 | def get_cache_control(federalist_config, filename): 75 | return federalist_config.get_headers_for_path(filename).get('cache-control') 76 | 77 | 78 | def strip_dirname(filepath, dirname): 79 | if dirname and filepath.startswith(dirname): 80 | return filepath[len(dirname):] 81 | return filepath 82 | 83 | 84 | def publish_to_s3(directory, base_url, site_prefix, bucket, federalist_config, 85 | s3_client, dry_run=False): 86 | '''Publishes the given directory to S3''' 87 | logger = get_logger('publish') 88 | 89 | # Add local 404 if does not already exist 90 | filename_404 = directory + '/404.html' 91 | if not path.isfile(filename_404): 92 | default_404_url = ('https://raw.githubusercontent.com' 93 | '/cloud-gov/pages-404-page/main/' 94 | '404-pages-client.html') 95 | default_404 = requests.get(default_404_url, timeout=10) 96 | makedirs(path.dirname(filename_404), exist_ok=True) 97 | with open(filename_404, "w+") as f: 98 | f.write(default_404.text) 99 | 100 | # Collect a list of all files in `directory`` 101 | local_objects_by_filename = {} 102 | 103 | for root, _dirs, filenames in walk(directory): 104 | for filename in filenames: 105 | full_path = path.join(root, filename) 106 | relative_path = strip_dirname(full_path, directory) 107 | 108 | if federalist_config.is_path_included(relative_path): 109 | cache_control = get_cache_control(federalist_config, relative_path) 110 | 111 | site_file = SiteFile(filename=full_path, 112 | dir_prefix=directory, 113 | site_prefix=site_prefix, 114 | cache_control=cache_control) 115 | 116 | local_objects_by_filename[site_file.filename] = site_file 117 | 118 | if filename == 'index.html': 119 | site_redirect = SiteRedirect(filename=root, 120 | dir_prefix=directory, 121 | site_prefix=site_prefix, 122 | base_url=base_url, 123 | cache_control=cache_control) 124 | 125 | local_objects_by_filename[site_redirect.filename] = site_redirect 126 | 127 | if len(local_objects_by_filename) == 0: 128 | raise RuntimeError('Local build files not found') 129 | 130 | # Get list of remote files 131 | remote_objects = list_remote_objects(bucket=bucket, 132 | site_prefix=site_prefix, 133 | s3_client=s3_client) 134 | 135 | # Make dicts by filename of local and remote objects for easier searching 136 | remote_objects_by_filename = {} 137 | for obj in remote_objects: 138 | # These will not have the `directory` prefix that our local 139 | # files do, so add it so we can more easily compare them. 140 | filename = path.join(directory, obj.filename) 141 | remote_objects_by_filename[filename] = obj 142 | 143 | # Create lists of all the new and modified objects 144 | new_objects = [] 145 | replacement_objects = [] 146 | # track whether we can do diffing because of cache control 147 | default_cache_control = getenv('CACHE_CONTROL', 'max-age=60') 148 | for local_filename, local_obj in local_objects_by_filename.items(): 149 | matching_remote_obj = remote_objects_by_filename.get(local_filename) 150 | if not matching_remote_obj: 151 | new_objects.append(local_obj) 152 | elif (matching_remote_obj.md5 != local_obj.md5 or 153 | local_obj.cache_control != default_cache_control): 154 | replacement_objects.append(local_obj) 155 | 156 | # Create a list of the remote objects that should be deleted 157 | deletion_objects = [ 158 | obj for filename, obj in remote_objects_by_filename.items() 159 | if not local_objects_by_filename.get(filename) 160 | ] 161 | 162 | if (len(new_objects) == 0 and len(replacement_objects) <= 1 and 163 | len(local_objects_by_filename) <= 1): 164 | raise RuntimeError('Cannot unpublish all files') 165 | 166 | logger.info('Preparing to upload') 167 | logger.info(f'New: {len(new_objects)}') 168 | logger.info(f'Replaced: {len(replacement_objects)}') 169 | logger.info(f'Deleted: {len(deletion_objects)}') 170 | 171 | # Upload new and replacement files 172 | upload_objects = new_objects + replacement_objects 173 | 174 | # task to be run in parallel via threadpool 175 | def uploader_task(client, file): 176 | logger.info(f'Uploading {file.s3_key}') 177 | try: 178 | file.upload_to_s3(bucket, client) 179 | except UnicodeEncodeError as err: 180 | if err.reason == 'surrogates not allowed': 181 | logger.warning( 182 | f'... unable to upload {file.filename} due ' 183 | f'to invalid characters in file name.' 184 | ) 185 | else: 186 | raise 187 | 188 | if dry_run: # pragma: no cover 189 | for file in upload_objects: 190 | logger.info(f'Dry-run uploading {file.s3_key}') 191 | else: 192 | with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: 193 | for file in upload_objects: 194 | executor.submit(uploader_task, s3_client, file) 195 | 196 | # Delete files not needed any more 197 | for file in deletion_objects: 198 | if dry_run: # pragma: no cover 199 | logger.info(f'Dry run deleting {file.s3_key}') 200 | else: 201 | logger.info(f'Deleting {file.s3_key}') 202 | 203 | file.delete_from_s3(bucket, s3_client) 204 | -------------------------------------------------------------------------------- /src/repo_config/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import path 3 | from .repo_config import RepoConfig 4 | 5 | __all__ = [ 6 | 'RepoConfig', 'from_json_file', 'from_object' 7 | ] 8 | 9 | PAGES_JSON = 'pages.json' 10 | FEDERALIST_JSON = 'federalist.json' 11 | 12 | 13 | def from_json_file(clone_dir, defaults={}): 14 | obj = {} 15 | 16 | json_files = [PAGES_JSON, FEDERALIST_JSON] 17 | for json_file_name in json_files: 18 | json_file_path = path.join(clone_dir, json_file_name) 19 | if path.isfile(json_file_path): 20 | with open(json_file_path) as json_file: 21 | obj = json.load(json_file) 22 | break 23 | 24 | return from_object(obj, defaults) 25 | 26 | 27 | def from_object(obj, defaults={}): 28 | return RepoConfig(obj, defaults) 29 | -------------------------------------------------------------------------------- /src/repo_config/repo_config.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | 3 | 4 | class RepoConfig: 5 | ''' 6 | Encapsulate the logic for handling the `federalist.json` configuration 7 | 8 | The file should look something like: 9 | { 10 | "fullClone": true, 11 | "headers": [ 12 | "/*": { 13 | "cache-control": "no-cache" 14 | } 15 | ], 16 | "excludePaths": [ 17 | "**/Dockerfile", 18 | "/another_excluded_file.yml" 19 | ], 20 | "includePaths": [ 21 | "/included_file", 22 | "/.well-known/security.txt 23 | ], 24 | "cache": true 25 | } 26 | 27 | Currently, only the following keys are utilized: 28 | - headers 29 | - excludePaths 30 | - includePaths 31 | - fullClone 32 | - cache 33 | ''' 34 | 35 | def __init__(self, config={}, defaults={}): 36 | self.config = config 37 | self.defaults = defaults 38 | 39 | def get_headers_for_path(self, path_to_match): 40 | ''' 41 | Determine the headers that apply to particular filepath 42 | ''' 43 | 44 | # A shallow copy should be sufficient 45 | resolved_headers = self.defaults.get('headers', {}).copy() 46 | 47 | first_matching_cfg = find_first_matching_cfg( 48 | self.config.get('headers', []), 49 | path_to_match) 50 | 51 | if first_matching_cfg: 52 | headers = first_value(first_matching_cfg) 53 | 54 | for key, value in headers.items(): 55 | resolved_headers[key.strip().lower()] = value.strip() 56 | 57 | return resolved_headers 58 | 59 | def is_path_excluded(self, path_to_match): 60 | return ((contains_dotpath(path_to_match) or self.is_exclude_path_match(path_to_match)) 61 | and not self.is_include_path_match(path_to_match)) 62 | 63 | def is_path_included(self, path_to_match): 64 | return not self.is_path_excluded(path_to_match) 65 | 66 | def is_exclude_path_match(self, path_to_match): 67 | return is_path_match(self.exclude_paths(), path_to_match) 68 | 69 | def is_include_path_match(self, path_to_match): 70 | return is_path_match(self.include_paths(), path_to_match) 71 | 72 | def full_clone(self): 73 | return self.config.get('fullClone', False) is True 74 | 75 | def exclude_paths(self): 76 | return self.config.get('excludePaths', []) + self.defaults.get('excludePaths', []) 77 | 78 | def include_paths(self): 79 | return self.config.get('includePaths', []) + self.defaults.get('includePaths', []) 80 | 81 | def should_cache(self): 82 | return self.config.get('cache', True) is True 83 | 84 | 85 | def contains_dotpath(filename): 86 | return any(segment for segment in filename.split('/') if segment.startswith('.')) 87 | 88 | 89 | def is_path_match(patterns, path_to_match): 90 | for pattern in patterns: 91 | if fnmatch.fnmatch(prepend_slash(path_to_match), pattern): 92 | return True 93 | 94 | return False 95 | 96 | 97 | def find_first_matching_cfg(configuration_section, path_to_match): 98 | ''' 99 | Find and return the FIRST configuration rule where the `path_to_match` matches 100 | the configured pattern. 101 | 102 | Order is important, so the configuration must be specified and handled as a 103 | list. 104 | 105 | If no path matches, an empty dict is returned. 106 | ''' 107 | 108 | return next( 109 | (configuration_rule 110 | for configuration_rule 111 | in configuration_section 112 | if match_path(first_key(configuration_rule), path_to_match)), 113 | {}) 114 | 115 | 116 | def match_path(pattern, path_to_match): 117 | ''' 118 | Determine if the `path_to_match` matches the path `pattern` 119 | 120 | >>> match_path('/*', '/index.html') 121 | True 122 | 123 | >>> match_path('/index.html', '/foo.js') 124 | False 125 | 126 | Patterns can contain the '*' and ':foo' wildcards. 127 | 128 | The '*' wildcard will match anything including '/' 129 | Ex. 130 | 131 | >>> match_path('/*', '/foo/bar/baz/index.html') 132 | True 133 | 134 | When combined with an extension, ie '*.html', the wildcard will match 135 | everything up to the LAST extension in the path to match, which must 136 | be matched exactly. 137 | Ex. 138 | 139 | >>> match_path('/*.html', '/foo/bar/baz/index.foo.html') 140 | True 141 | 142 | >>> match_path('/*.foo', '/foo/bar/baz/index.foo.html') 143 | False 144 | 145 | The ':foo' wildcard will match anything EXCEPT '/', 146 | ie it is a single segment wildcard. It can contain any letters after ':' 147 | Ex. 148 | 149 | >>> match_path('/:foo/bar', '/abc/bar') 150 | True 151 | 152 | >>> match_path('/:baz', '/abc/foo') 153 | False 154 | ''' 155 | 156 | # normalize the paths by removing leading slash since that will 157 | # result in a leading empty string with 'split'ing 158 | pattern = strip_prefix('/', pattern) 159 | path_to_match = strip_prefix('/', path_to_match) 160 | 161 | pattern_parts = pattern.split('/') 162 | path_parts = path_to_match.split('/') 163 | 164 | for idx, pattern_part in enumerate(pattern_parts): 165 | if pattern_part == '*': 166 | return True 167 | 168 | if pattern_part.startswith(':'): 169 | continue 170 | 171 | if len(path_parts) <= idx: 172 | return False 173 | 174 | if pattern_part.startswith('*.'): 175 | pattern_part_ext = pattern_part.split('.')[-1] 176 | last_path_part = path_parts[-1] 177 | last_path_ext = last_path_part.split('.')[-1] 178 | return last_path_ext == pattern_part_ext 179 | 180 | path_part = path_parts[idx] 181 | 182 | if path_part != pattern_part: 183 | return False 184 | 185 | if len(path_parts) > len(pattern_parts): 186 | return False 187 | 188 | return True 189 | 190 | 191 | def first_key(dikt): 192 | return next(key for key in dikt) 193 | 194 | 195 | def first_value(dikt): 196 | return next(value for value in dikt.values()) 197 | 198 | 199 | def strip_prefix(prefix, path): 200 | # Copied from models.py::remove_prefix 201 | return path[len(prefix):] if path.startswith(prefix) else path 202 | 203 | 204 | def prepend_slash(path): 205 | return path if path.startswith('/') else ('/' + path) 206 | -------------------------------------------------------------------------------- /src/runner/__init__.py: -------------------------------------------------------------------------------- 1 | import grp 2 | import os 3 | import pwd 4 | import shlex 5 | import subprocess # nosec 6 | from io import StringIO 7 | 8 | NVM_PATH = '~/.nvm/nvm.sh' 9 | RVM_PATH = '/usr/local/rvm/scripts/rvm' 10 | 11 | 12 | def setuser(): 13 | os.setgid(grp.getgrnam('rvm').gr_gid) 14 | os.setuid(pwd.getpwnam('customer').pw_uid) 15 | 16 | 17 | def run(logger, command, cwd=None, env=None, shell=False, check=True, node=False, ruby=False, skip_log=False): # noqa: E501 18 | ''' 19 | Run an OS command with provided cwd or env, stream logs to logger, and return the exit code. 20 | 21 | Errors that occur BEFORE the command is actually executed are caught and handled here. 22 | 23 | Errors encountered by the executed command are caught unless `check=False`. In these cases a 24 | non-zero exit code will be returned to be handled by the caller. 25 | 26 | See https://docs.python.org/3/library/subprocess.html#popen-constructor for details. 27 | ''' 28 | 29 | if ruby: 30 | command = f'source {RVM_PATH} && {command}' 31 | shell = True 32 | 33 | if node: 34 | command = f'source {NVM_PATH} && {command}' 35 | shell = True 36 | 37 | if isinstance(command, str) and not shell: 38 | command = shlex.split(command) 39 | 40 | # When a shell is needed, use `bash` instead of `sh` 41 | executable = '/bin/bash' if shell else None 42 | 43 | # aggregate stdout in case we need to return 44 | output = StringIO() 45 | 46 | try: 47 | p = subprocess.Popen( # nosec 48 | command, 49 | cwd=cwd, 50 | env=env, 51 | shell=shell, 52 | executable=executable, 53 | stderr=subprocess.STDOUT, 54 | stdout=subprocess.PIPE, 55 | bufsize=1, 56 | encoding='utf-8', 57 | text=True, 58 | preexec_fn=setuser 59 | ) 60 | while p.poll() is None: 61 | line = p.stdout.readline().strip() 62 | if not skip_log: 63 | logger.info(line) 64 | output.write(line) 65 | 66 | line = p.stdout.readline().strip() 67 | if not skip_log: 68 | logger.info(line) 69 | output.write(line) 70 | 71 | if check: 72 | if p.returncode: 73 | raise subprocess.CalledProcessError(p.returncode, command) 74 | return output.getvalue() 75 | 76 | return p.returncode 77 | 78 | # This occurs when Popen itself is called with invalid arguments 79 | except ValueError as err: 80 | logger.error('Encountered a problem invoking Popen.') 81 | logger.error(str(err)) 82 | 83 | if check: 84 | raise err 85 | 86 | return 1 87 | 88 | # This occurs when the command given to Popen cannot be executed. 89 | # Ex. the file doesn't exist, there was a typo, etc... 90 | except OSError as err: 91 | logger.error('Encountered a problem executing `' + ' '.join(command) + '`.') 92 | logger.error(str(err)) 93 | 94 | if check: 95 | raise err 96 | 97 | return 1 98 | -------------------------------------------------------------------------------- /src/steps/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import ( 2 | build_hugo, build_jekyll, build_static, 3 | download_hugo, run_build_script, run_step, setup_bundler, 4 | setup_node, setup_ruby, 5 | ) 6 | from .exceptions import StepException 7 | from .fetch import fetch_repo, update_repo, fetch_commit_sha 8 | from .publish import publish 9 | 10 | 11 | __all__ = [ 12 | 'build_hugo', 13 | 'build_jekyll', 14 | 'build_static', 15 | 'download_hugo', 16 | 'fetch_repo', 17 | 'publish', 18 | 'run_build_script', 19 | 'run_step', 20 | 'setup_bundler', 21 | 'setup_node', 22 | 'setup_ruby', 23 | 'StepException', 24 | 'update_repo', 25 | 'fetch_commit_sha', 26 | ] 27 | -------------------------------------------------------------------------------- /src/steps/build.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | from os import path 5 | from pathlib import Path 6 | import re 7 | import requests 8 | import shlex 9 | import subprocess # nosec 10 | import time 11 | import yaml 12 | 13 | from common import (CLONE_DIR_PATH, SITE_BUILD_DIR, SITE_BUILD_DIR_PATH, WORKING_DIR_PATH) 14 | from log_utils import get_logger 15 | from runner import run, setuser 16 | from .cache import CacheFolder 17 | from .exceptions import StepException 18 | 19 | HUGO_BIN = 'hugo' 20 | HUGO_VERSION = '.hugo-version' 21 | NVMRC = '.nvmrc' 22 | PACKAGE_JSON = 'package.json' 23 | PACKAGE_LOCK = 'package-lock.json' 24 | NODE_MODULES = 'node_modules' 25 | RUBY_VERSION = '.ruby-version' 26 | GEMFILE = 'Gemfile' 27 | GEMFILELOCK = 'Gemfile.lock' 28 | JEKYLL_CONFIG_YML = '_config.yml' 29 | BUNDLER_VERSION = '.bundler-version' 30 | 31 | CERTS_PATH = Path('/etc/ssl/certs/ca-certificates.crt') 32 | RVM_PATH = Path('/usr/local/rvm/scripts/rvm') 33 | 34 | 35 | def build_env(branch, owner, repository, site_prefix, base_url, 36 | user_env_vars=[]): 37 | '''Creates a dict of environment variables to pass into a build context''' 38 | env = { 39 | 'BRANCH': branch, 40 | 'OWNER': owner, 41 | 'REPOSITORY': repository, 42 | 'SITE_PREFIX': site_prefix, 43 | 'BASEURL': base_url, 44 | # necessary to make sure build engines use utf-8 encoding 45 | 'LANG': 'en_US.UTF-8', 46 | 'GATSBY_TELEMETRY_DISABLED': '1', 47 | # Not that folks should really be using `pry` on Pages but 48 | # https://github.com/pry/pry/pull/2165 49 | 'HOME': '/home/customer', 50 | } 51 | 52 | for uev in user_env_vars: 53 | name = uev['name'] 54 | value = uev['value'] 55 | if name in env or name.upper() in env: 56 | print( 57 | f'user environment variable name `{name}` conflicts ' 58 | 'with system environment variable, it will be ignored.' 59 | ) 60 | else: 61 | env[name] = value 62 | 63 | return env 64 | 65 | 66 | def build_static(): 67 | '''Moves all files from CLONE_DIR into SITE_BUILD_DIR''' 68 | logger = get_logger('build-static') 69 | 70 | dir = path.join(CLONE_DIR_PATH, '.git') 71 | logger.info(f'Cleaning {dir}') 72 | shutil.rmtree(dir, ignore_errors=True) 73 | 74 | logger.info(f'Moving files to {SITE_BUILD_DIR}') 75 | 76 | # Make the site build directory first 77 | SITE_BUILD_DIR_PATH.mkdir(exist_ok=True) 78 | 79 | files = os.listdir(CLONE_DIR_PATH) 80 | 81 | for file in files: 82 | # don't move the SITE_BUILD_DIR dir into itself 83 | if file is not SITE_BUILD_DIR: 84 | shutil.move(str(CLONE_DIR_PATH / file), 85 | str(SITE_BUILD_DIR_PATH)) 86 | 87 | 88 | def has_build_script(script_name): 89 | ''' 90 | Checks for existence of the script (ie: "federalist", "pages") in the 91 | cloned repo's package.json. 92 | ''' 93 | PACKAGE_JSON_PATH = CLONE_DIR_PATH / PACKAGE_JSON 94 | if PACKAGE_JSON_PATH.is_file(): 95 | with PACKAGE_JSON_PATH.open() as json_file: 96 | package_json = json.load(json_file) 97 | return script_name in package_json.get('scripts', {}) 98 | 99 | return False 100 | 101 | 102 | def check_supported_ruby_version(version): 103 | ''' 104 | Checks if the version defined in .ruby-version is supported 105 | Raises a generic exception if not 106 | ''' 107 | is_supported = 0 108 | 109 | if version: 110 | logger = get_logger('setup-ruby') 111 | 112 | RUBY_VERSION_MIN = os.getenv('RUBY_VERSION_MIN') 113 | is_supported = run( 114 | logger, 115 | f'ruby -e "exit Gem::Version.new(\'{shlex.split(version)[0]}\') >= Gem::Version.new(\'{RUBY_VERSION_MIN}\') ? 1 : 0"', # noqa: E501 116 | cwd=CLONE_DIR_PATH, 117 | env={}, 118 | ruby=True, 119 | check=False, 120 | ) 121 | 122 | upgrade_msg = 'Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.' # noqa: E501 123 | 124 | if version == RUBY_VERSION_MIN: 125 | logger.warning( 126 | f'WARNING: Ruby {RUBY_VERSION_MIN} will soon reach end-of-life, at which point Pages will no longer support it.') # noqa: E501 127 | logger.warning(upgrade_msg) 128 | 129 | if not is_supported: 130 | error = 'ERROR: Unsupported ruby version specified in .ruby-version.' 131 | logger.error(error) 132 | logger.error(upgrade_msg) 133 | raise Exception(error) 134 | 135 | 136 | def setup_node(should_cache: bool, bucket, s3_client, post_metrics): 137 | ''' 138 | Sets up node and installs dependencies. 139 | 140 | Uses the node version specified in the cloned repo's .nvmrc 141 | file if it is present. 142 | ''' 143 | logger = get_logger('setup-node') 144 | 145 | def runp(cmd, skip_log=False): 146 | return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, node=True, skip_log=skip_log) 147 | 148 | NVMRC_PATH = CLONE_DIR_PATH / NVMRC 149 | if NVMRC_PATH.is_file(): 150 | # nvm will output the node and npm versions used 151 | logger.info('Checking node version specified in .nvmrc') 152 | runp(""" 153 | RAW_VERSION=$(nvm version-remote $(cat .nvmrc)) 154 | MAJOR_VERSION=$(echo $RAW_VERSION | cut -d. -f 1 | cut -dv -f 2) 155 | if [[ "$MAJOR_VERSION" =~ ^(18|20|22)$ ]]; then 156 | echo "Switching to node version $RAW_VERSION specified in .nvmrc" 157 | 158 | if [[ "$MAJOR_VERSION" -eq 18 ]]; then 159 | echo "WARNING: Node $RAW_VERSION will reach end-of-life on 2025-04-30, at which point Pages will no longer support it." 160 | echo "Please upgrade to LTS major version 20 or 22, see https://nodejs.org/en/about/releases/ for details." 161 | fi 162 | 163 | nvm install $RAW_VERSION 164 | nvm alias default $RAW_VERSION 165 | else 166 | echo "Unsupported node major version '$MAJOR_VERSION' specified in .nvmrc." 167 | echo "Please upgrade to LTS major version 20 or 22, see https://nodejs.org/en/about/releases/ for details." 168 | exit 1 169 | fi 170 | """) # noqa: E501 171 | else: 172 | # output node and npm versions if the defaults are used 173 | logger.info('Using default node version') 174 | runp('nvm alias default $(nvm version)') 175 | runp('echo Node version: $(node --version)') 176 | runp('echo NPM version: $(npm --version)') 177 | 178 | # capture version and cache 179 | node_version = runp('node --version', skip_log=True) 180 | post_metrics({ 181 | "engines": { 182 | "node": dict(version=node_version, cache=should_cache) 183 | } 184 | }) 185 | 186 | cache_folder = None 187 | PACKAGE_LOCK_PATH = CLONE_DIR_PATH / PACKAGE_LOCK 188 | if PACKAGE_LOCK_PATH.is_file(): 189 | if should_cache: 190 | logger.info(f'{PACKAGE_LOCK} found. Attempting to download cache') 191 | NM_FOLDER = CLONE_DIR_PATH / NODE_MODULES 192 | cache_folder = CacheFolder(PACKAGE_LOCK_PATH, NM_FOLDER, bucket, s3_client, logger) 193 | cache_folder.download_unzip() 194 | 195 | if PACKAGE_LOCK_PATH.is_file(): 196 | if should_cache and cache_folder.exists(): 197 | logger.info('skipping npm ci and using cache') 198 | else: 199 | logger.info('Installing dependencies in package-lock.json') 200 | runp('npm set audit false') 201 | runp('npm ci') 202 | 203 | if PACKAGE_LOCK_PATH.is_file() and should_cache: 204 | if not cache_folder.exists(): 205 | cache_folder.zip_upload_folder_to_s3() 206 | 207 | 208 | def run_build_script(branch, owner, repository, site_prefix, 209 | base_url='', user_env_vars=[]): 210 | ''' 211 | Runs the npm build (ie: "federalist","pages", ...) script if it is defined 212 | ''' 213 | 214 | scripts = ["pages", "federalist"] 215 | for script_name in scripts: 216 | if has_build_script(script_name): 217 | logger = get_logger(f'run-{script_name}-script') 218 | logger.info(f'Running {script_name} build script in package.json') 219 | env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars) 220 | run(logger, f'npm run {script_name}', cwd=CLONE_DIR_PATH, env=env, node=True) 221 | return 222 | 223 | 224 | def run_step(step, msg, *args, **kwargs): 225 | try: 226 | step(*args, **kwargs) 227 | except Exception: 228 | raise StepException(msg) 229 | 230 | 231 | def download_hugo(post_metrics): 232 | logger = get_logger('download-hugo') 233 | 234 | HUGO_VERSION_PATH = CLONE_DIR_PATH / HUGO_VERSION 235 | if HUGO_VERSION_PATH.is_file(): 236 | logger.info('.hugo-version found') 237 | hugo_version = '' 238 | with HUGO_VERSION_PATH.open() as hugo_vers_file: 239 | try: 240 | hugo_version = hugo_vers_file.readline().strip() 241 | hugo_version = shlex.quote(hugo_version) 242 | regex = r'^(extended_)?[\d]+(\.[\d]+)*$' 243 | hugo_version = re.search(regex, hugo_version).group(0) 244 | except Exception: 245 | raise RuntimeError('Invalid .hugo-version') 246 | 247 | if hugo_version: 248 | logger.info(f'Using hugo version in .hugo-version: {hugo_version}') 249 | post_metrics({ 250 | "engines": { 251 | "hugo": dict(version=hugo_version) 252 | } 253 | }) 254 | else: 255 | raise RuntimeError(".hugo-version not found") 256 | ''' 257 | Downloads the specified version of Hugo 258 | ''' 259 | logger.info(f'Downloading hugo version {hugo_version}') 260 | failed_attempts = 0 261 | while (failed_attempts < 5): 262 | try: 263 | dl_url = ('https://github.com/gohugoio/hugo/releases/download/v' 264 | + hugo_version.split('_')[-1] + 265 | f'/hugo_{hugo_version}_Linux-64bit.tar.gz') 266 | response = requests.get(dl_url, verify=CERTS_PATH, timeout=10) 267 | 268 | hugo_tar_path = WORKING_DIR_PATH / 'hugo.tar.gz' 269 | with hugo_tar_path.open('wb') as hugo_tar: 270 | for chunk in response.iter_content(chunk_size=128): 271 | hugo_tar.write(chunk) 272 | 273 | HUGO_BIN_PATH = WORKING_DIR_PATH / HUGO_BIN 274 | run(logger, f'tar -xzf {hugo_tar_path} -C {WORKING_DIR_PATH}', env={}) 275 | run(logger, f'chmod +x {HUGO_BIN_PATH}', env={}) 276 | return 277 | except Exception: 278 | failed_attempts += 1 279 | logger.info( 280 | f'Failed attempt #{failed_attempts} to download hugo version: {hugo_version}' 281 | ) 282 | if failed_attempts == 5: 283 | raise RuntimeError(f'Unable to download hugo version: {hugo_version}') 284 | time.sleep(2) # try again in 2 seconds 285 | 286 | 287 | def build_hugo(branch, owner, repository, site_prefix, 288 | base_url='', user_env_vars=[]): 289 | ''' 290 | Builds the cloned site with Hugo 291 | ''' 292 | logger = get_logger('build-hugo') 293 | 294 | HUGO_BIN_PATH = WORKING_DIR_PATH / HUGO_BIN 295 | 296 | run(logger, f'echo hugo version: $({HUGO_BIN_PATH} version)', env={}) 297 | 298 | logger.info('Building site with hugo') 299 | 300 | hugo_args = f'--source {CLONE_DIR_PATH} --destination {SITE_BUILD_DIR_PATH}' 301 | if base_url: 302 | hugo_args += f' --baseURL {base_url}' 303 | 304 | env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars) 305 | run(logger, f'{HUGO_BIN_PATH} {hugo_args}', cwd=CLONE_DIR_PATH, env=env, node=True) 306 | 307 | 308 | def setup_ruby(should_cache, post_metrics): 309 | ''' 310 | Sets up RVM and installs ruby 311 | Uses the ruby version specified in .ruby-version if present 312 | ''' 313 | 314 | logger = get_logger('setup-ruby') 315 | 316 | def runp(cmd, skip_log=False): 317 | return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, ruby=True, skip_log=skip_log) 318 | 319 | RUBY_VERSION_PATH = CLONE_DIR_PATH / RUBY_VERSION 320 | if RUBY_VERSION_PATH.is_file(): 321 | logger.info('Using ruby version in .ruby-version') 322 | with RUBY_VERSION_PATH.open() as ruby_vers_file: 323 | ruby_version = ruby_vers_file.readline().strip() 324 | # escape-quote the value in case there's anything weird 325 | # in the .ruby-version file 326 | ruby_version = shlex.quote(ruby_version) 327 | check_supported_ruby_version(ruby_version) 328 | runp(f'rvm install {ruby_version}') 329 | 330 | ruby_version = runp('ruby -v', skip_log=True) 331 | post_metrics({ 332 | "engines": { 333 | "ruby": dict(version=ruby_version, cache=should_cache) 334 | } 335 | }) 336 | runp('echo Ruby version: $(ruby -v)') 337 | 338 | 339 | def setup_bundler(should_cache: bool, bucket, s3_client): 340 | logger = get_logger('setup-bundler') 341 | 342 | def runp(cmd): 343 | return run(logger, cmd, cwd=CLONE_DIR_PATH, env={}, ruby=True) 344 | 345 | GEMFILE_PATH = CLONE_DIR_PATH / GEMFILE 346 | GEMFILELOCK_PATH = CLONE_DIR_PATH / GEMFILELOCK 347 | 348 | if not GEMFILE_PATH.is_file(): 349 | logger.info('No Gemfile found, installing Jekyll.') 350 | return runp('gem install jekyll -v 4.2.2 --no-document') 351 | 352 | logger.info('Gemfile found, setting up bundler') 353 | 354 | version = '<2' 355 | 356 | BUNDLER_VERSION_PATH = CLONE_DIR_PATH / BUNDLER_VERSION 357 | 358 | if BUNDLER_VERSION_PATH.is_file(): 359 | with BUNDLER_VERSION_PATH.open() as bundler_vers_file: 360 | try: 361 | bundler_vers = bundler_vers_file.readline().strip() 362 | # escape-quote the value in case there's anything weird 363 | # in the .bundler-version file 364 | bundler_vers = shlex.quote(bundler_vers) 365 | regex = r'^[\d]+(\.[\d]+)*$' 366 | bundler_vers = re.search(regex, bundler_vers).group(0) 367 | if bundler_vers: 368 | logger.info('Using bundler version in .bundler-version') 369 | version = bundler_vers 370 | except Exception: 371 | raise RuntimeError('Invalid .bundler-version') 372 | 373 | runp(f'gem install bundler --version "{version}"') 374 | 375 | cache_folder = None 376 | if GEMFILELOCK_PATH.is_file() and should_cache: 377 | logger.info(f'{GEMFILELOCK} found. Attempting to download cache') 378 | GEMFOLDER = subprocess.run( # nosec 379 | f'source {RVM_PATH} && rvm gemdir', 380 | cwd=CLONE_DIR_PATH, 381 | shell=True, 382 | executable='/bin/bash', 383 | capture_output=True, 384 | preexec_fn=setuser 385 | ) 386 | GEMFOLDER = GEMFOLDER.stdout.decode('utf-8').strip() 387 | cache_folder = CacheFolder(GEMFILELOCK_PATH, GEMFOLDER, bucket, s3_client, logger) 388 | cache_folder.download_unzip() 389 | 390 | logger.info('Installing dependencies in Gemfile') 391 | runp('bundle install') 392 | 393 | if GEMFILELOCK_PATH.is_file() and should_cache: 394 | # we also need to check for cache_folder here because we shouldn't cache if they didn't 395 | # initially have a lockfile (bundle install creates one) 396 | if cache_folder and not cache_folder.exists(): 397 | cache_folder.zip_upload_folder_to_s3() 398 | 399 | 400 | def update_jekyll_config(federalist_config={}, custom_config_path=''): 401 | logger = get_logger('build-jekyll') 402 | 403 | JEKYLL_CONF_YML_PATH = CLONE_DIR_PATH / JEKYLL_CONFIG_YML 404 | 405 | config_yml = {} 406 | with JEKYLL_CONF_YML_PATH.open('r') as jekyll_conf_file: 407 | config_yml = yaml.safe_load(jekyll_conf_file) 408 | 409 | custom_config = {} 410 | if custom_config_path: 411 | try: 412 | custom_config = json.loads(custom_config_path) 413 | except json.JSONDecodeError: 414 | error = 'Could not load/parse custom yaml config.' 415 | logger.error(error) 416 | raise Exception(error) 417 | 418 | config_yml = {**config_yml, **custom_config, **federalist_config} 419 | 420 | with JEKYLL_CONF_YML_PATH.open('w') as jekyll_conf_file: 421 | yaml.dump(config_yml, jekyll_conf_file, default_flow_style=False) 422 | 423 | 424 | def build_jekyll(branch, owner, repository, site_prefix, 425 | base_url='', config='', user_env_vars=[]): 426 | ''' 427 | Builds the cloned site with Jekyll 428 | ''' 429 | logger = get_logger('build-jekyll') 430 | 431 | update_jekyll_config( 432 | dict(baseurl=base_url, branch=branch), 433 | config 434 | ) 435 | 436 | jekyll_cmd = 'jekyll' 437 | 438 | GEMFILE_PATH = CLONE_DIR_PATH / GEMFILE 439 | if GEMFILE_PATH.is_file(): 440 | jekyll_cmd = f'bundle exec {jekyll_cmd}' 441 | 442 | run( 443 | logger, 444 | f'echo Building using Jekyll version: $({jekyll_cmd} -v)', 445 | cwd=CLONE_DIR_PATH, 446 | env={}, 447 | ruby=True 448 | ) 449 | 450 | env = build_env(branch, owner, repository, site_prefix, base_url, user_env_vars) 451 | env['JEKYLL_ENV'] = 'production' 452 | 453 | run( 454 | logger, 455 | f'{jekyll_cmd} build --destination {SITE_BUILD_DIR_PATH}', 456 | cwd=CLONE_DIR_PATH, 457 | env=env, 458 | node=True, 459 | ruby=True 460 | ) 461 | -------------------------------------------------------------------------------- /src/steps/cache.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import os 3 | import hashlib 4 | import shutil 5 | import botocore 6 | 7 | # Cache expiration time 8 | NEXT_MONTH = datetime.now() + timedelta(days=30) 9 | ARCHIVE_METHOD = 'tar' 10 | 11 | 12 | def get_checksum(filename): 13 | m = hashlib.md5() # nosec 14 | with open(filename, 'rb') as f: 15 | while chunk := f.read(4096): 16 | m.update(chunk) 17 | return m.hexdigest() 18 | 19 | 20 | class CacheFolder(): 21 | ''' 22 | An abstract class for a cache folder in S3 23 | ''' 24 | 25 | def __init__(self, checksum_file, local_folder, bucket, s3_client, logger): 26 | self.checksum_file = checksum_file 27 | self.key = get_checksum(checksum_file) 28 | self.local_folder = local_folder 29 | self.bucket = bucket 30 | self.s3_client = s3_client 31 | self.logger = logger 32 | 33 | def exists(self): 34 | '''Check if a given cache key exists''' 35 | try: 36 | self.s3_client.head_object( 37 | Bucket=self.bucket, 38 | Key=f'_cache/{self.key}' 39 | ) 40 | return True 41 | except botocore.exceptions.ClientError as error: 42 | if error.response['Error']['Message'] == 'Not Found': 43 | return False 44 | else: 45 | self.logger.error(error.response['Error']) 46 | raise error 47 | 48 | def zip_upload_folder_to_s3(self): 49 | self.logger.info(f'Caching dependencies from {self.local_folder}.') 50 | tmp_file = f'{self.key}.{ARCHIVE_METHOD}' 51 | shutil.make_archive(self.key, ARCHIVE_METHOD, self.local_folder) 52 | self.logger.info(f'Created archive {tmp_file}') 53 | self.s3_client.upload_file( 54 | Filename=tmp_file, 55 | Bucket=self.bucket, 56 | Key=f'_cache/{self.key}', 57 | ExtraArgs=dict(Expires=NEXT_MONTH) 58 | ) 59 | os.unlink(tmp_file) 60 | 61 | def download_unzip(self): 62 | if self.exists(): 63 | self.logger.info(f'Dependency cache found, downloading to {self.local_folder}.') 64 | tmp_file = f'{self.key}.{ARCHIVE_METHOD}' 65 | self.s3_client.download_file( 66 | Filename=tmp_file, 67 | Bucket=self.bucket, 68 | Key=f'_cache/{self.key}' 69 | ) 70 | shutil.unpack_archive(tmp_file, self.local_folder, ARCHIVE_METHOD) 71 | os.unlink(tmp_file) 72 | else: 73 | self.logger.info('No cache file found.') 74 | -------------------------------------------------------------------------------- /src/steps/exceptions.py: -------------------------------------------------------------------------------- 1 | class StepException(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /src/steps/fetch.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Fetch tasks and helpers 3 | ''' 4 | import shlex 5 | import subprocess # nosec 6 | 7 | from log_utils import get_logger 8 | from runner import run 9 | from common import (REPO_BASE_URL, CLONE_DIR_PATH) 10 | from steps import StepException 11 | 12 | 13 | def fetch_url(owner, repository, access_token=''): # nosec 14 | ''' 15 | Creates a URL to a remote git repository. 16 | If `access_token` is specified, it will be included in the authentication 17 | section of the returned URL. 18 | 19 | >>> fetch_url('owner', 'repo') 20 | 'https://github.com/owner/repo.git' 21 | 22 | >>> fetch_url('owner2', 'repo2', 'secret-token') 23 | 'https://secret-token@github.com/owner2/repo2.git' 24 | ''' 25 | repo_url = f'{REPO_BASE_URL}/{owner}/{repository}.git' 26 | if access_token: 27 | repo_url = f'{access_token}@{repo_url}' 28 | 29 | return f'https://{repo_url}' 30 | 31 | 32 | def fetch_repo(owner, repository, branch, github_token=''): # nosec 33 | ''' 34 | Clones the GitHub repository specified by owner and repository 35 | into CLONE_DIR_PATH. 36 | ''' 37 | logger = get_logger('clone') 38 | 39 | owner = shlex.quote(owner) 40 | repository = shlex.quote(repository) 41 | branch = shlex.quote(branch) 42 | 43 | clone_env = { 44 | 'HOME': '/home' 45 | } 46 | 47 | command = ( 48 | f'git clone -b {branch} --single-branch --depth 1 ' 49 | f'{fetch_url(owner, repository, github_token)} ' 50 | f'{CLONE_DIR_PATH}' 51 | ) 52 | 53 | return run(logger, command, env=clone_env, check=False) 54 | 55 | 56 | def update_repo(clone_dir): 57 | ''' 58 | Updates the repo with the full git history 59 | ''' 60 | logger = get_logger('update') 61 | 62 | logger.info('Fetching full git history') 63 | 64 | command = 'git pull --unshallow' 65 | 66 | return run(logger, command, cwd=clone_dir) 67 | 68 | 69 | def fetch_commit_sha(clone_dir): 70 | ''' 71 | fetch the last commitSHA 72 | ''' 73 | try: 74 | logger = get_logger('clone') 75 | logger.info('Fetching commit details ...') 76 | # prior to running commands on the repo, make sure it isn't "dubious" 77 | # "detected dubious ownership in repository" 78 | git_command = shlex.split(f'git config --global --add safe.directory {clone_dir}') 79 | subprocess.run( # nosec 80 | git_command, 81 | shell=False, 82 | check=True, 83 | stdout=subprocess.PIPE, 84 | universal_newlines=True, 85 | cwd=clone_dir 86 | ) 87 | command = shlex.split('git log -1') # get last commit only 88 | process = subprocess.run( # nosec 89 | command, 90 | shell=False, 91 | check=True, 92 | stdout=subprocess.PIPE, 93 | universal_newlines=True, 94 | cwd=clone_dir 95 | ) 96 | commit_log = process.stdout 97 | commit_sha = commit_log.split()[1] 98 | logger.info(f'commit {commit_sha}') 99 | return commit_sha 100 | except Exception: 101 | raise StepException('There was a problem fetching the commit hash for this build') 102 | -------------------------------------------------------------------------------- /src/steps/publish.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Publish tasks and helpers 3 | ''' 4 | from datetime import datetime 5 | 6 | from publishing import s3publisher 7 | 8 | from log_utils import delta_to_mins_secs, get_logger 9 | from common import SITE_BUILD_DIR_PATH 10 | 11 | 12 | def publish(base_url, site_prefix, bucket, federalist_config, 13 | s3_client, dry_run=False): 14 | ''' 15 | Publish the built site to S3. 16 | ''' 17 | logger = get_logger('publish') 18 | 19 | logger.info('Publishing to S3') 20 | 21 | start_time = datetime.now() 22 | 23 | s3publisher.publish_to_s3( 24 | directory=str(SITE_BUILD_DIR_PATH), 25 | base_url=base_url, 26 | site_prefix=site_prefix, 27 | bucket=bucket, 28 | federalist_config=federalist_config, 29 | s3_client=s3_client, 30 | dry_run=dry_run 31 | ) 32 | 33 | delta_string = delta_to_mins_secs(datetime.now() - start_time) 34 | logger.info(f'Total time to publish: {delta_string}') 35 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/__init__.py -------------------------------------------------------------------------------- /test/publishing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/publishing/__init__.py -------------------------------------------------------------------------------- /test/publishing/test_models.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | from unittest.mock import Mock 4 | 5 | import pytest 6 | 7 | from publishing.models import SiteObject, SiteFile, SiteRedirect 8 | from ..support import generate_file_hash 9 | 10 | 11 | class TestSiteObject(): 12 | def test_constructor(self): 13 | model = SiteObject( 14 | filename='boop', 15 | md5='md5', 16 | dir_prefix='dir_prefix', 17 | site_prefix='site_prefix' 18 | ) 19 | assert model is not None 20 | 21 | # default params are used 22 | model = SiteObject(filename='boop2', md5='abc') 23 | assert model is not None 24 | assert model.dir_prefix == '' 25 | assert model.site_prefix == '' 26 | 27 | def test_s3_key(self): 28 | model = SiteObject('abc', 'md5', site_prefix='site') 29 | assert model.s3_key == 'site/abc' 30 | 31 | model = SiteObject('/dir/abc', 'md5', 32 | dir_prefix='/dir', site_prefix='site') 33 | assert model.s3_key == 'site/abc' 34 | 35 | model = SiteObject('/not_dir/abc', 'md5', 36 | dir_prefix='/dir', site_prefix='site') 37 | assert model.s3_key == 'site//not_dir/abc' 38 | 39 | def test_delete_from_s3(self): 40 | s3_client = Mock() 41 | 42 | model = SiteObject('/dir/abc', 'md5', 43 | dir_prefix='/dir', site_prefix='site') 44 | model.delete_from_s3('test-bucket', s3_client) 45 | s3_client.delete_object.assert_called_once_with( 46 | Bucket='test-bucket', 47 | Key='site/abc') 48 | 49 | def test_upload_to_s3(self): 50 | model = SiteObject('abc', 'md5') 51 | # Base SiteObject should not have this method implemented 52 | # because it is specific to file and redirect objects 53 | with pytest.raises(NotImplementedError): 54 | model.upload_to_s3('bucket', None) 55 | 56 | 57 | class TestSiteFile(): 58 | @pytest.mark.parametrize('filename, is_compressible', [ 59 | ('test_file.html', True), 60 | ('test_file.css', True), 61 | ('test_file.js', True), 62 | ('test_file.json', True), 63 | ('test_file.svg', True), 64 | ('test_file.txt', False), 65 | ('test_file.exe', False), 66 | ]) 67 | def test_is_compressible(self, tmpdir, filename, is_compressible): 68 | test_dir = tmpdir.mkdir('a_dir') 69 | test_file = test_dir.join(filename) 70 | test_file.write('something something') 71 | model = SiteFile( 72 | filename=str(test_file), 73 | dir_prefix=str(test_dir), 74 | site_prefix='/site', 75 | cache_control='max-age=60') 76 | assert model.is_compressible == is_compressible 77 | 78 | def test_non_compressible_file(self, tmpdir): 79 | test_dir = tmpdir.mkdir('boop') 80 | test_file = test_dir.join('test_file.txt') 81 | test_file.write('content') 82 | model = SiteFile( 83 | filename=str(test_file), 84 | dir_prefix=str(test_dir), 85 | site_prefix='/site', 86 | cache_control='max-age=60') 87 | 88 | assert model is not None 89 | 90 | # hardcoded md5 hash of 'content' 91 | assert model.md5 == '9a0364b9e99bb480dd25e1f0284c8555' 92 | assert model.s3_key == '/site/test_file.txt' 93 | assert model.dir_prefix == str(test_dir) 94 | assert model.content_encoding is None 95 | assert model.content_type == 'text/plain' 96 | 97 | # Make sure uploads is called correctly 98 | s3_client = Mock() 99 | model.upload_to_s3('test-bucket', s3_client) 100 | s3_client.upload_file.assert_called_once_with( 101 | Filename=str(test_file), 102 | Bucket='test-bucket', 103 | Key='/site/test_file.txt', 104 | ExtraArgs={ 105 | 'CacheControl': 'max-age=60', 106 | 'ServerSideEncryption': 'AES256', 107 | 'ContentType': 'text/plain', 108 | }, 109 | ) 110 | 111 | def test_compressible_file(self, tmpdir): 112 | test_dir = tmpdir.mkdir('boop') 113 | 114 | # .html files are compressible 115 | test_file = test_dir.join('test_file.html') 116 | test_file.write('content') 117 | model = SiteFile( 118 | filename=str(test_file), 119 | dir_prefix=str(test_dir), 120 | site_prefix='/site', 121 | cache_control='max-age=60') 122 | 123 | assert model is not None 124 | assert model.is_compressible is True 125 | assert model.is_compressed is True 126 | 127 | assert model.md5 == generate_file_hash(test_file) 128 | assert model.s3_key == '/site/test_file.html' 129 | assert model.dir_prefix == str(test_dir) 130 | assert model.content_encoding == 'gzip' 131 | assert model.content_type == 'text/html' 132 | 133 | # Make sure upload is called correctly 134 | s3_client = Mock() 135 | model.upload_to_s3('test-bucket', s3_client) 136 | s3_client.upload_file.assert_called_once_with( 137 | Filename=str(test_file), 138 | Bucket='test-bucket', 139 | Key='/site/test_file.html', 140 | ExtraArgs={ 141 | 'CacheControl': 'max-age=60', 142 | 'ServerSideEncryption': 'AES256', 143 | 'ContentType': 'text/html', 144 | 'ContentEncoding': 'gzip', 145 | }, 146 | ) 147 | 148 | 149 | class TestSiteRedirect(): 150 | def test_constructor_and_props(self, tmpdir): 151 | base_test_dir = tmpdir.mkdir('boop') 152 | test_dir = base_test_dir.mkdir('sub_dir') 153 | 154 | model = SiteRedirect( 155 | filename=str(test_dir), 156 | dir_prefix=str(base_test_dir), 157 | site_prefix='prefix', 158 | base_url='/preview', 159 | cache_control='max-age=60' 160 | ) 161 | 162 | assert model is not None 163 | 164 | expected_dest = '/preview/sub_dir/' 165 | assert model.md5 == hashlib.md5(expected_dest.encode()).hexdigest() 166 | assert model.destination == expected_dest 167 | assert model.s3_key == 'prefix/sub_dir' 168 | 169 | # try with empty dir_prefix 170 | model.dir_prefix = '' 171 | assert model.destination == f'/preview/{test_dir}/' 172 | assert model.s3_key == f'prefix/{test_dir}' 173 | 174 | # and when we're dealing with the "root" redirect object 175 | # ie, filename and dir_prefix are the same 176 | model.filename = str(base_test_dir) 177 | model.dir_prefix = str(base_test_dir) 178 | assert model.destination == '/preview/' 179 | assert model.s3_key == 'prefix' 180 | 181 | def test_upload_to_s3(self, tmpdir): 182 | base_test_dir = tmpdir.mkdir('boop') 183 | test_dir = base_test_dir.mkdir('wherever') 184 | 185 | model = SiteRedirect( 186 | filename=str(test_dir), 187 | dir_prefix=str(base_test_dir), 188 | site_prefix='site-prefix', 189 | base_url='/site/test', 190 | cache_control='max-age=60' 191 | ) 192 | 193 | s3_client = Mock() 194 | model.upload_to_s3('test-bucket', s3_client) 195 | 196 | expected_dest = '/site/test/wherever/' 197 | 198 | s3_client.put_object.assert_called_once_with( 199 | Body=expected_dest, 200 | Bucket='test-bucket', 201 | Key='site-prefix/wherever', 202 | ServerSideEncryption='AES256', 203 | WebsiteRedirectLocation=expected_dest, 204 | CacheControl="max-age=60", 205 | ) 206 | -------------------------------------------------------------------------------- /test/publishing/test_s3publisher.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import pytest 3 | import requests_mock 4 | 5 | from moto import mock_aws 6 | 7 | from publishing.s3publisher import list_remote_objects, publish_to_s3 8 | from publishing.models import SiteObject 9 | 10 | import repo_config 11 | 12 | TEST_BUCKET = 'test-bucket' 13 | TEST_REGION = 'test-region' 14 | TEST_ACCESS_KEY = 'fake-access-key' 15 | TEST_SECRET_KEY = 'fake-secret-key' 16 | 17 | 18 | @pytest.fixture 19 | def s3_client(monkeypatch): 20 | monkeypatch.setenv('AWS_ACCESS_KEY_ID', TEST_ACCESS_KEY) 21 | monkeypatch.setenv('AWS_SECRET_ACCESS_KEY', TEST_SECRET_KEY) 22 | 23 | with mock_aws(): 24 | conn = boto3.resource('s3', region_name=TEST_REGION) 25 | 26 | conn.create_bucket( 27 | Bucket=TEST_BUCKET, 28 | CreateBucketConfiguration={"LocationConstraint": "test-bucket"} 29 | ) 30 | 31 | s3_client = boto3.client( 32 | service_name='s3', 33 | region_name=TEST_REGION, 34 | aws_access_key_id=TEST_ACCESS_KEY, 35 | aws_secret_access_key=TEST_SECRET_KEY, 36 | ) 37 | 38 | yield s3_client 39 | 40 | 41 | def test_list_remote_objects(monkeypatch, s3_client): 42 | # Check that nothing is returned if nothing is in the bucket 43 | results = list_remote_objects(TEST_BUCKET, '/test-site', s3_client) 44 | assert results == [] 45 | 46 | # Add a few objects with different prefixes 47 | s3_client.put_object(Key='test-site/a', Body='a', Bucket=TEST_BUCKET) 48 | s3_client.put_object(Key='wrong-prefix/b', Body='b', Bucket=TEST_BUCKET) 49 | 50 | # Check that only one object matching the prefix is returned 51 | results = list_remote_objects(TEST_BUCKET, 'test-site', s3_client) 52 | assert len(results) == 1 53 | assert type(results[0]) == SiteObject 54 | assert results[0].s3_key == 'test-site/a' 55 | 56 | # Add a few more objects 57 | for i in range(0, 10): 58 | s3_client.put_object(Key=f'test-site/sub/{i}.html', 59 | Body=f'{i}', Bucket=TEST_BUCKET) 60 | 61 | # Monkeypatch max keys so we can ensure ContinuationTokens are used 62 | monkeypatch.setattr('publishing.s3publisher.MAX_S3_KEYS_PER_REQUEST', 5) 63 | 64 | # Check that we get all expected objects back 65 | results = list_remote_objects(TEST_BUCKET, 'test-site', s3_client) 66 | assert len(results) == 11 # 10 keys from the loop, 1 from previous put 67 | 68 | 69 | def _make_fake_files(dir, filenames): 70 | for f_name in filenames: 71 | file = dir.join(f_name) 72 | file.write(f'fake content for {f_name}') 73 | 74 | 75 | def test_publish_to_s3(tmpdir, s3_client): 76 | # Use tmpdir to create a fake directory 77 | # full of directories and files to be published/deleted/updated 78 | test_dir = tmpdir.mkdir('test_dir') 79 | 80 | # make a subdirectory 81 | test_dir.mkdir('sub_dir') 82 | 83 | site_prefix = 'test_dir' 84 | 85 | filenames = ['index.html', 86 | 'boop.txt', 87 | 'sub_dir/index.html'] 88 | 89 | _make_fake_files(test_dir, filenames) 90 | 91 | federalist_config = repo_config.from_object( 92 | { 93 | 'headers': [ 94 | {'/index.html': {'cache-control': 'no-cache'}}, 95 | {'/*.txt': {'cache-control': 'max-age=1000'}} 96 | ], 97 | 'excludePaths': [ 98 | '/excluded-file' 99 | ] 100 | }, 101 | { 102 | 'headers': { 103 | 'cache-control': 'max-age=60' 104 | }, 105 | 'excludePaths': [ 106 | '*/Dockerfile', 107 | '*/docker-compose.yml' 108 | ], 109 | 'includePaths': [ 110 | '/.well-known/security.txt' 111 | ] 112 | } 113 | ) 114 | 115 | publish_kwargs = { 116 | 'directory': str(test_dir), 117 | 'base_url': '/base_url', 118 | 'site_prefix': site_prefix, 119 | 'bucket': TEST_BUCKET, 120 | 'federalist_config': federalist_config, 121 | 's3_client': s3_client, 122 | } 123 | 124 | # Create mock for default 404 page request 125 | with requests_mock.mock() as m: 126 | m.get(('https://raw.githubusercontent.com' 127 | '/cloud-gov/pages-404-page/main/' 128 | '404-pages-client.html'), 129 | text='default 404 page') 130 | 131 | publish_to_s3(**publish_kwargs) 132 | 133 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 134 | 135 | keys = [r['Key'] for r in results['Contents']] 136 | 137 | assert results['KeyCount'] == 6 # 4 files, 3 redirects & 404.html 138 | 139 | assert f'{site_prefix}/index.html' in keys 140 | assert f'{site_prefix}/boop.txt' in keys 141 | assert f'{site_prefix}/sub_dir' in keys 142 | assert f'{site_prefix}/sub_dir/index.html' in keys 143 | assert f'{site_prefix}/404.html' in keys 144 | assert f'{site_prefix}' in keys # main redirect object 145 | 146 | # Check the cache control headers 147 | cache_control_checks = [ 148 | ('index.html', 'no-cache'), 149 | ('boop.txt', 'max-age=1000'), 150 | ('404.html', 'max-age=60') 151 | ] 152 | for filename, expected in cache_control_checks: 153 | result = s3_client.get_object( 154 | Bucket=TEST_BUCKET, 155 | Key=f'{site_prefix}/{filename}')['CacheControl'] 156 | assert result == expected 157 | 158 | # Add another file to the directory 159 | more_filenames = ['new_index.html'] 160 | _make_fake_files(test_dir, more_filenames) 161 | publish_to_s3(**publish_kwargs) 162 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 163 | 164 | assert results['KeyCount'] == 7 165 | 166 | # Delete some files and check that the published files count 167 | # is correct 168 | test_dir.join('new_index.html').remove() 169 | test_dir.join('boop.txt').remove() 170 | publish_to_s3(**publish_kwargs) 171 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 172 | assert results['KeyCount'] == 5 173 | 174 | # Write an existing file with different content so that it 175 | # needs to get updated 176 | index_key = f'{site_prefix}/index.html' 177 | orig_etag = s3_client.get_object( 178 | Bucket=TEST_BUCKET, 179 | Key=index_key)['ETag'] 180 | test_dir.join('index.html').write('totally new content!!!') 181 | publish_to_s3(**publish_kwargs) 182 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 183 | 184 | # number of keys should be the same 185 | assert results['KeyCount'] == 5 186 | 187 | # make sure content in changed file is updated 188 | new_etag = s3_client.get_object( 189 | Bucket=TEST_BUCKET, 190 | Key=index_key)['ETag'] 191 | assert new_etag != orig_etag 192 | 193 | # test hidden files and directories 194 | test_dir.mkdir('.well-known') 195 | test_dir.mkdir('.not-well-known') 196 | more_filenames = ['.well-known/security.txt', 197 | '.well-known/not-security.txt', 198 | '.well-known/.security', 199 | '.not-well-known/security.txt', 200 | '.security'] 201 | _make_fake_files(test_dir, more_filenames) 202 | publish_to_s3(**publish_kwargs) 203 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 204 | assert results['KeyCount'] == 6 205 | 206 | # make sure default excluded files are excluded by default 207 | more_filenames = ['Dockerfile', 208 | 'docker-compose.yml'] 209 | _make_fake_files(test_dir, more_filenames) 210 | publish_to_s3(**publish_kwargs) 211 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 212 | assert results['KeyCount'] == 6 213 | 214 | # make sure files can be excluded in configuration 215 | more_filenames = ['excluded-file'] 216 | _make_fake_files(test_dir, more_filenames) 217 | publish_to_s3(**publish_kwargs) 218 | results = s3_client.list_objects_v2(Bucket=TEST_BUCKET) 219 | assert results['KeyCount'] == 6 220 | -------------------------------------------------------------------------------- /test/repo_config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloud-gov/pages-build-container/00b490e9f858f10b52eb0875b637aea61a913438/test/repo_config/__init__.py -------------------------------------------------------------------------------- /test/repo_config/test_repo_config.py: -------------------------------------------------------------------------------- 1 | from repo_config.repo_config import (RepoConfig, contains_dotpath, match_path, 2 | find_first_matching_cfg) 3 | 4 | 5 | def test_match_path(): 6 | 7 | # (, , ) 8 | configs = [ 9 | # static paths 10 | ('/', '/', True), 11 | ('/', '/hello', False), 12 | ('/hello', '/hello', True), 13 | ('/hello', '/hello/world', False), 14 | ('/hello/world', '/hello', False), 15 | 16 | # wildcard paths 17 | ('/*', '/', True), 18 | ('/*', '/hello', True), 19 | ('/*', '/hello.js', True), 20 | ('/*', '/hello/world', True), 21 | ('/hello/*', '/hello/world', True), 22 | ('/hello/*', '/world', False), 23 | ('/hello/*', '/hello/sdhgfsjdh/dkjhsfhsdfkj', True), 24 | 25 | # wildcard extension paths 26 | ('/*.html', '/', False), 27 | ('/*.html', '/foo', False), 28 | ('/*.html', '/foo.js', False), 29 | ('/*.html', '/foo.html', True), 30 | ('/bar/*.html', '/foo.html', False), 31 | ('/bar/*.html', '/bar/foo.html', True), 32 | ('/bar/*.html', '/bar/foo.js', False), 33 | ('/bar/*.html', '/bar/baz/foo.html', True), 34 | ('/bar/*.html', '/bar/baz/foo.js', False), 35 | ('/bar/*.map', '/bar/foo.js.map', True), 36 | 37 | # segment wildcard paths 38 | ('/:hello', '/', True), 39 | ('/:hello', '/booyah', True), 40 | ('/:hello', '/booyah/world', False), 41 | ('/:hello/world', '/booyah/world', True), 42 | ('/:hello/world', '/booyah/hello', False), 43 | ('/:hello/world', '/booyah', False), 44 | 45 | # crazy town 46 | ('/hello/*/foo', '/hello/sdhgfsjdh/dkjhsfhsdfkj', True), 47 | ('/:hello/world/*', '/booyah/world', True), 48 | ('/:hello/world/*', '/booyah/world/foo', True), 49 | ('/hi/:hello/world/*', '/hi/booyah/nope', False), 50 | ('/hi/:hello/world/*', '/hi/booyah/world', True), 51 | ('/hi/:hello/world/*', '/hi/booyah/world/crazy', True), 52 | 53 | # even when missing leading '/' 54 | (':hello/world/*', '/booyah/world/foo', True), 55 | (':hello/world/*', 'booyah/world/foo', True), 56 | ('/:hello/world/*', 'booyah/world', True), 57 | ] 58 | 59 | for cfg_path, path_to_match, expected_result in configs: 60 | assert match_path(cfg_path, path_to_match) == expected_result 61 | 62 | 63 | def test_find_first_matching_cfg(): 64 | headers = [ 65 | {'/index.html': {'cache-control': 'no-cache'}}, 66 | {'/:foo/*.html': {'cache-control': 'max-age=2000'}}, 67 | {'/*.html': {'cache-control': 'max-age=4000'}}, 68 | {'/*': {'cache-control': 'max-age=6000'}} 69 | ] 70 | 71 | configs = [ 72 | (headers, '/index.html', headers[0]), 73 | (headers, '/foo/bar.html', headers[1]), 74 | (headers, '/foo.html', headers[2]), 75 | (headers, '/', headers[3]), 76 | (headers, '/bar.js', headers[3]), 77 | ({}, '/bar.js', {}) 78 | ] 79 | 80 | for cfg_headers, path_to_match, expected_result in configs: 81 | assert find_first_matching_cfg( 82 | cfg_headers, path_to_match) == expected_result 83 | 84 | 85 | def test_get_headers_for_path(): 86 | config = { 87 | 'headers': [ 88 | {'/index.html': {'cache-control': 'no-cache'}}, 89 | {'/*.html': {'cache-control': 'max-age=4000'}}, 90 | {'/*': {'cache-control': 'max-age=6000'}} 91 | ] 92 | } 93 | 94 | defaults = { 95 | 'headers': { 96 | 'cache-control': 'max-age=60', 97 | 'foo-header': 'special-stuff:with-a-colon!' 98 | } 99 | } 100 | 101 | repo_config = RepoConfig(config=config, defaults=defaults) 102 | 103 | # When multiple paths match, return the first 104 | path_to_match = '/index.html' 105 | value = repo_config.get_headers_for_path(path_to_match) 106 | assert value == { 107 | 'cache-control': 'no-cache', 108 | 'foo-header': 'special-stuff:with-a-colon!' 109 | } 110 | 111 | # Match the partial wildcard 112 | path_to_match = '/foo.html' 113 | value = repo_config.get_headers_for_path(path_to_match) 114 | assert value == { 115 | 'cache-control': 'max-age=4000', 116 | 'foo-header': 'special-stuff:with-a-colon!' 117 | } 118 | 119 | # Match the total wildcard 120 | path_to_match = '/foo.js' 121 | value = repo_config.get_headers_for_path(path_to_match) 122 | assert value == { 123 | 'cache-control': 'max-age=6000', 124 | 'foo-header': 'special-stuff:with-a-colon!' 125 | } 126 | 127 | # Match default 128 | config = { 129 | 'headers': [ 130 | {'/index.html': {'cache-control': 'max-age=3000'}} 131 | ] 132 | } 133 | repo_config = RepoConfig(config=config, defaults=defaults) 134 | 135 | path_to_match = '/foo.js' 136 | value = repo_config.get_headers_for_path(path_to_match) 137 | assert value == defaults['headers'] 138 | 139 | # Match no headers! 140 | config = {} 141 | repo_config = RepoConfig(config=config, defaults=defaults) 142 | 143 | path_to_match = '/foo.js' 144 | value = repo_config.get_headers_for_path(path_to_match) 145 | assert value == defaults['headers'] 146 | 147 | 148 | def test_exclude_paths_always_returns_a_list(): 149 | repo_config = RepoConfig(config={}, defaults={}) 150 | value = repo_config.exclude_paths() 151 | assert value == [] 152 | 153 | 154 | def test_exclude_paths_returns_union_of_config_and_defaults(): 155 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 156 | value = repo_config.exclude_paths() 157 | assert value == [ 158 | '/excluded-file', 159 | '/excluded-folder', 160 | '/excluded-folder/*', 161 | '*/Dockerfile', 162 | '/docker-compose.yml' 163 | ] 164 | 165 | 166 | def test_include_paths_always_returns_a_list(): 167 | repo_config = RepoConfig(config={}, defaults={}) 168 | value = repo_config.include_paths() 169 | assert value == [] 170 | 171 | 172 | def test_include_paths_returns_union_of_config_and_defaults(): 173 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 174 | value = repo_config.include_paths() 175 | assert value == [ 176 | '/foo/Dockerfile', 177 | '*/.foo', 178 | '/.well-known/security.txt' 179 | ] 180 | 181 | 182 | def test_is_exclude_path_match(): 183 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 184 | 185 | # Excludes default file anywhere 186 | value = repo_config.is_exclude_path_match('/Dockerfile') 187 | assert value is True 188 | 189 | value = repo_config.is_exclude_path_match('/foo/Dockerfile') 190 | assert value is True 191 | 192 | value = repo_config.is_exclude_path_match('/foo/bar/baz/Dockerfile') 193 | assert value is True 194 | 195 | # Excludes default file only at root 196 | value = repo_config.is_exclude_path_match('/docker-compose.yml') 197 | assert value is True 198 | 199 | value = repo_config.is_exclude_path_match('/foo/docker-compose.yml') 200 | assert value is False 201 | 202 | # Excludes a file explicitly excluded 203 | value = repo_config.is_exclude_path_match('/excluded-file') 204 | assert value is True 205 | 206 | # Doesn't exclude a file not explicitly excluded 207 | value = repo_config.is_exclude_path_match('/index.html') 208 | assert value is False 209 | 210 | 211 | def test_is_include_path_match(): 212 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 213 | 214 | # Includes default file only in root 215 | value = repo_config.is_include_path_match('/.well-known/security.txt') 216 | assert value is True 217 | 218 | # Includes Dockerfile when that default is overridden by configuration 219 | value = repo_config.is_include_path_match('/foo/Dockerfile') 220 | assert value is True 221 | 222 | # Includes dot file 223 | value = repo_config.is_include_path_match('/foo/bar/.foo') 224 | assert value is True 225 | 226 | 227 | def test_is_path_excluded(): 228 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 229 | 230 | # Excludes dotfiles 231 | value = repo_config.is_path_excluded('/.bar') 232 | assert value is True 233 | 234 | value = repo_config.is_path_excluded('/foo/.bar') 235 | assert value is True 236 | 237 | # Includes dotfiles when specified 238 | value = repo_config.is_path_excluded('/.well-known/security.txt') 239 | assert value is False 240 | 241 | value = repo_config.is_path_excluded('/bar/.foo') 242 | assert value is False 243 | 244 | # Excludes defaults 245 | value = repo_config.is_path_excluded('/Dockerfile') 246 | assert value is True 247 | 248 | value = repo_config.is_path_excluded('/bar/Dockerfile') 249 | assert value is True 250 | 251 | value = repo_config.is_path_excluded('/docker-compose.yml') 252 | assert value is True 253 | 254 | value = repo_config.is_path_excluded('/foo/docker-compose.yml') 255 | assert value is False 256 | 257 | # Excludes configured files 258 | value = repo_config.is_path_excluded('/excluded-file') 259 | assert value is True 260 | 261 | value = repo_config.is_path_excluded('/foo/excluded-file') 262 | assert value is False 263 | 264 | # Excludes configured folders 265 | value = repo_config.is_path_excluded('/excluded-folder') 266 | assert value is True 267 | 268 | value = repo_config.is_path_excluded('/excluded-folder/') 269 | assert value is True 270 | 271 | value = repo_config.is_path_excluded('/excluded-folder/foo.txt') 272 | assert value is True 273 | 274 | value = repo_config.is_path_excluded('/foo/excluded-folder/foo.txt') 275 | assert value is False 276 | 277 | # Includes configured that overrides default 278 | value = repo_config.is_path_excluded('/foo/Dockerfile') 279 | assert value is False 280 | 281 | # Prepends slashes 282 | value = repo_config.is_path_excluded('excluded-file') 283 | assert value is True 284 | 285 | value = repo_config.is_path_excluded('foo/excluded-file') 286 | assert value is False 287 | 288 | 289 | def test_is_path_included_is_not_is_path_excluded(): 290 | repo_config = RepoConfig(config=test_config(), defaults=test_defaults()) 291 | path = '/bar/.foo' 292 | included_value = repo_config.is_path_included(path) 293 | excluded_value = repo_config.is_path_excluded(path) 294 | assert included_value is not excluded_value 295 | 296 | 297 | def test_contains_dotpath(): 298 | value = contains_dotpath('/.foo') 299 | assert value is True 300 | 301 | value = contains_dotpath('/.foo/bar') 302 | assert value is True 303 | 304 | value = contains_dotpath('/foo/.bar') 305 | assert value is True 306 | 307 | value = contains_dotpath('/foo/.bar/baz') 308 | assert value is True 309 | 310 | value = contains_dotpath('/foo/bar') 311 | assert value is False 312 | 313 | 314 | def test_config(): 315 | return { 316 | 'excludePaths': [ 317 | '/excluded-file', 318 | '/excluded-folder', 319 | '/excluded-folder/*' 320 | ], 321 | 'includePaths': [ 322 | '/foo/Dockerfile', 323 | '*/.foo' 324 | ] 325 | } 326 | 327 | 328 | def test_defaults(): 329 | return { 330 | 'excludePaths': [ 331 | '*/Dockerfile', 332 | '/docker-compose.yml' 333 | ], 334 | 'includePaths': [ 335 | '/.well-known/security.txt' 336 | ] 337 | } 338 | -------------------------------------------------------------------------------- /test/support.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import hashlib 3 | 4 | from pathlib import Path 5 | 6 | 7 | def patch_dir(monkeypatch, module, dir_constant): 8 | with tempfile.TemporaryDirectory() as tmpdir: 9 | tmpdir_path = Path(tmpdir) 10 | monkeypatch.setattr(module, dir_constant, tmpdir_path) 11 | yield tmpdir_path 12 | 13 | 14 | def create_file(file_path, contents='', mode='w'): 15 | with file_path.open(mode) as f: 16 | f.write(contents) 17 | 18 | 19 | def generate_file_hash(filename): 20 | hash_md5 = hashlib.md5() # nosec 21 | 22 | with open(filename, 'rb') as file: 23 | for chunk in iter(lambda: file.read(4096), b""): 24 | hash_md5.update(chunk) 25 | 26 | return hash_md5.hexdigest() 27 | -------------------------------------------------------------------------------- /test/test_build.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | import json 3 | import os 4 | from io import StringIO 5 | from unittest.mock import call, patch, Mock 6 | from subprocess import CalledProcessError # nosec 7 | 8 | import pytest 9 | import requests_mock 10 | import requests 11 | import yaml 12 | 13 | import steps 14 | from steps import ( 15 | build_hugo, build_jekyll, build_static, download_hugo, 16 | run_build_script, run_step, setup_bundler, setup_node, setup_ruby, StepException 17 | ) 18 | from steps.build import ( 19 | build_env, check_supported_ruby_version, BUNDLER_VERSION, GEMFILE, 20 | GEMFILELOCK, HUGO_BIN, HUGO_VERSION, JEKYLL_CONFIG_YML, 21 | NVMRC, PACKAGE_JSON, PACKAGE_LOCK, RUBY_VERSION 22 | ) 23 | 24 | from .support import create_file, patch_dir 25 | 26 | 27 | @pytest.fixture 28 | def patch_clone_dir(monkeypatch): 29 | yield from patch_dir(monkeypatch, steps.build, 'CLONE_DIR_PATH') 30 | 31 | 32 | @pytest.fixture 33 | def patch_working_dir(monkeypatch): 34 | yield from patch_dir(monkeypatch, steps.build, 'WORKING_DIR_PATH') 35 | 36 | 37 | @pytest.fixture 38 | def patch_site_build_dir(monkeypatch): 39 | yield from patch_dir(monkeypatch, steps.build, 'SITE_BUILD_DIR_PATH') 40 | 41 | 42 | @pytest.fixture 43 | def patch_ruby_min_version(monkeypatch): 44 | monkeypatch.setenv('RUBY_VERSION_MIN', '3.0.0') 45 | 46 | 47 | @patch('steps.build.run') 48 | @patch('steps.build.get_logger') 49 | class TestSetupNode(): 50 | def test_it_uses_nvmrc_file_if_it_exists(self, mock_get_logger, mock_run, patch_clone_dir): 51 | create_file(patch_clone_dir / NVMRC, contents='6') 52 | 53 | mock_post_metrics = Mock() 54 | setup_node(False, None, None, mock_post_metrics) 55 | 56 | mock_get_logger.assert_called_once_with('setup-node') 57 | 58 | mock_logger = mock_get_logger.return_value 59 | 60 | mock_logger.info.assert_called_with( 61 | 'Checking node version specified in .nvmrc' 62 | ) 63 | 64 | mock_post_metrics.assert_called_once() 65 | 66 | def test_installs_deps(self, mock_get_logger, mock_run, patch_clone_dir): 67 | create_file(patch_clone_dir / PACKAGE_LOCK) 68 | 69 | mock_post_metrics = Mock() 70 | setup_node(False, None, None, mock_post_metrics) 71 | 72 | mock_get_logger.assert_called_once_with('setup-node') 73 | 74 | mock_logger = mock_get_logger.return_value 75 | 76 | mock_logger.info.assert_has_calls([ 77 | call('Using default node version'), 78 | call('Installing dependencies in package-lock.json') 79 | ]) 80 | 81 | def callp(cmd, skip_log=False): 82 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, node=True, skip_log=skip_log) 83 | 84 | mock_run.assert_has_calls([ 85 | callp('echo Node version: $(node --version)'), 86 | callp('echo NPM version: $(npm --version)'), 87 | callp('node --version', skip_log=True), 88 | callp('npm set audit false'), 89 | callp('npm ci'), 90 | ]) 91 | 92 | mock_post_metrics.assert_called_once() 93 | 94 | def test_returns_code_when_err(self, mock_get_logger, mock_run): 95 | mock_run.side_effect = CalledProcessError(1, 'command') 96 | 97 | mock_post_metrics = Mock() 98 | with pytest.raises(CalledProcessError): 99 | setup_node(False, None, None, mock_post_metrics) 100 | 101 | 102 | @patch('steps.build.run') 103 | @patch('steps.build.get_logger') 104 | class TestRunBuildScript(): 105 | def test_it_runs_federalist_script_when_it_exists(self, mock_get_logger, mock_run, 106 | patch_clone_dir): 107 | package_json_contents = json.dumps({ 108 | 'scripts': { 109 | 'federalist': 'echo federalist', 110 | }, 111 | }) 112 | create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents) 113 | 114 | kwargs = dict( 115 | branch='branch', 116 | owner='owner', 117 | repository='repo', 118 | site_prefix='site/prefix', 119 | base_url='/site/prefix' 120 | ) 121 | 122 | run_build_script(**kwargs) 123 | 124 | mock_get_logger.assert_called_once_with('run-federalist-script') 125 | 126 | mock_logger = mock_get_logger.return_value 127 | 128 | mock_logger.info.assert_called_with( 129 | 'Running federalist build script in package.json' 130 | ) 131 | 132 | mock_run.assert_called_once_with( 133 | mock_logger, 134 | 'npm run federalist', 135 | cwd=patch_clone_dir, 136 | env=build_env(*kwargs.values()), 137 | node=True 138 | ) 139 | 140 | def test_it_runs_pages_script_when_it_exists(self, mock_get_logger, mock_run, 141 | patch_clone_dir): 142 | package_json_contents = json.dumps({ 143 | 'scripts': { 144 | 'pages': 'echo pages', 145 | }, 146 | }) 147 | create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents) 148 | 149 | kwargs = dict( 150 | branch='branch', 151 | owner='owner', 152 | repository='repo', 153 | site_prefix='site/prefix', 154 | base_url='/site/prefix' 155 | ) 156 | 157 | run_build_script(**kwargs) 158 | 159 | mock_get_logger.assert_called_once_with('run-pages-script') 160 | 161 | mock_logger = mock_get_logger.return_value 162 | 163 | mock_logger.info.assert_called_with( 164 | 'Running pages build script in package.json' 165 | ) 166 | 167 | mock_run.assert_called_once_with( 168 | mock_logger, 169 | 'npm run pages', 170 | cwd=patch_clone_dir, 171 | env=build_env(*kwargs.values()), 172 | node=True 173 | ) 174 | 175 | def test_it_only_runs_pages_script_when_both_exist(self, mock_get_logger, mock_run, 176 | patch_clone_dir): 177 | package_json_contents = json.dumps({ 178 | 'scripts': { 179 | 'pages': 'echo pages', 180 | 'federalist': 'echo federalist', 181 | }, 182 | }) 183 | create_file(patch_clone_dir / PACKAGE_JSON, package_json_contents) 184 | 185 | kwargs = dict( 186 | branch='branch', 187 | owner='owner', 188 | repository='repo', 189 | site_prefix='site/prefix', 190 | base_url='/site/prefix' 191 | ) 192 | 193 | run_build_script(**kwargs) 194 | 195 | mock_get_logger.assert_called_once_with('run-pages-script') 196 | 197 | mock_logger = mock_get_logger.return_value 198 | 199 | mock_logger.info.assert_called_with( 200 | 'Running pages build script in package.json' 201 | ) 202 | 203 | mock_run.assert_called_once_with( 204 | mock_logger, 205 | 'npm run pages', 206 | cwd=patch_clone_dir, 207 | env=build_env(*kwargs.values()), 208 | node=True 209 | ) 210 | 211 | def test_it_does_not_run_otherwise(self, mock_get_logger, mock_run): 212 | run_build_script('b', 'o', 'r', 'sp') 213 | 214 | mock_get_logger.assert_not_called() 215 | mock_run.assert_not_called() 216 | 217 | 218 | class TestRunStep(): 219 | def test_it_should_raise_an_exception_from_step(self): 220 | msg = 'testing-msg' 221 | arg1 = 'arg1' 222 | kwarg1 = 'kwarg1' 223 | mock_step = Mock(side_effect=KeyError) 224 | 225 | with raises(StepException): 226 | run_step(mock_step, msg, arg1, kwarg1=kwarg1) 227 | 228 | mock_step.assert_called_once_with(arg1, kwarg1=kwarg1) 229 | 230 | def test_it_should_run_step_successfully(self): 231 | msg = 'testing-msg' 232 | arg1 = 'arg1' 233 | kwarg1 = 'kwarg1' 234 | mock_step = Mock() 235 | mock_step.return_value() 236 | 237 | run_step(mock_step, msg, arg1, kwarg1=kwarg1) 238 | 239 | mock_step.assert_called_once_with(arg1, kwarg1=kwarg1) 240 | 241 | 242 | @patch('steps.build.run') 243 | @patch('steps.build.get_logger') 244 | @patch('steps.build.check_supported_ruby_version') 245 | class TestSetupRuby(): 246 | def test_no_ruby_version_file(self, mock_check_supported_ruby_version, 247 | mock_get_logger, mock_run, patch_clone_dir): 248 | 249 | mock_post_metrics = Mock() 250 | setup_ruby(False, mock_post_metrics) 251 | 252 | mock_get_logger.assert_called_once_with('setup-ruby') 253 | mock_logger = mock_get_logger.return_value 254 | 255 | mock_post_metrics.assert_called_once() 256 | 257 | def callp(cmd, skip_log=False): 258 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log) 259 | 260 | mock_run.assert_has_calls([ 261 | callp('ruby -v', skip_log=True), 262 | callp('echo Ruby version: $(ruby -v)') 263 | ]) 264 | 265 | def test_it_uses_ruby_version_if_it_exists(self, 266 | mock_check_supported_ruby_version, 267 | mock_get_logger, mock_run, 268 | patch_clone_dir): 269 | 270 | version = '3.1' 271 | 272 | create_file(patch_clone_dir / RUBY_VERSION, version) 273 | 274 | mock_post_metrics = Mock() 275 | setup_ruby(False, mock_post_metrics) 276 | 277 | mock_get_logger.assert_called_once_with('setup-ruby') 278 | 279 | mock_logger = mock_get_logger.return_value 280 | 281 | def callp(cmd, skip_log=False): 282 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log) 283 | 284 | mock_run.assert_has_calls([ 285 | callp(f'rvm install {version}'), 286 | callp('ruby -v', skip_log=True), 287 | callp('echo Ruby version: $(ruby -v)') 288 | ]) 289 | 290 | def test_it_strips_and_quotes_ruby_version(self, 291 | mock_check_supported_ruby_version, 292 | mock_get_logger, mock_run, 293 | patch_clone_dir): 294 | 295 | version = ' $3.1 ' 296 | create_file(patch_clone_dir / RUBY_VERSION, version) 297 | 298 | mock_post_metrics = Mock() 299 | setup_ruby(False, mock_post_metrics) 300 | 301 | mock_get_logger.assert_called_once_with('setup-ruby') 302 | 303 | mock_logger = mock_get_logger.return_value 304 | 305 | def callp(cmd, skip_log=False): 306 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True, skip_log=skip_log) 307 | 308 | mock_logger.info.assert_has_calls([ 309 | call('Using ruby version in .ruby-version'), 310 | ]) 311 | 312 | mock_run.assert_has_calls([ 313 | callp("rvm install '$3.1'"), 314 | callp('ruby -v', skip_log=True), 315 | callp('echo Ruby version: $(ruby -v)'), 316 | ]) 317 | 318 | def test_it_errors_when_rvm_install_fails(self, 319 | mock_check_supported_ruby_version, 320 | mock_get_logger, mock_run, 321 | patch_clone_dir): 322 | 323 | version = '3.1' 324 | create_file(patch_clone_dir / RUBY_VERSION, version) 325 | 326 | error = 'error installing ruby' 327 | mock_run.side_effect = Exception(error) 328 | 329 | mock_post_metrics = Mock() 330 | with pytest.raises(Exception) as einfo: 331 | setup_ruby(False, mock_post_metrics) 332 | 333 | mock_get_logger.assert_called_once_with('setup-ruby') 334 | 335 | assert str(einfo.value).strip() == error 336 | 337 | def test_it_outputs_warning_if_eol_approaching(self, 338 | mock_check_supported_ruby_version, 339 | mock_get_logger, mock_run, 340 | patch_ruby_min_version): 341 | 342 | min_ruby_version = os.getenv('RUBY_VERSION_MIN') 343 | check_supported_ruby_version(min_ruby_version) 344 | 345 | mock_logger = mock_get_logger.return_value 346 | 347 | mock_logger.warning.assert_has_calls([ 348 | call( 349 | f'WARNING: Ruby {min_ruby_version} will soon reach end-of-life, at which point Pages will no longer support it.'), # noqa: E501 350 | call('Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.') # noqa: E501 351 | ]) 352 | 353 | def test_it_outputs_warning_if_not_supported(self, 354 | mock_check_supported_ruby_version, 355 | mock_get_logger, mock_run, patch_ruby_min_version): 356 | version = '2.3' 357 | mock_run.return_value = 0 358 | 359 | with pytest.raises(Exception) as einfo: 360 | check_supported_ruby_version(version) 361 | 362 | error = 'ERROR: Unsupported ruby version specified in .ruby-version.' 363 | assert str(einfo.value).strip() == error 364 | 365 | mock_logger = mock_get_logger.return_value 366 | 367 | mock_logger.error.assert_has_calls([ 368 | call('ERROR: Unsupported ruby version specified in .ruby-version.'), 369 | call('Please upgrade to an actively supported version, see https://www.ruby-lang.org/en/downloads/branches/ for details.') # noqa: E501 370 | ]) 371 | 372 | 373 | @patch('steps.build.run') 374 | @patch('steps.build.get_logger') 375 | class TestSetupBundler(): 376 | def test_when_no_gemfile_just_load_jekyll(self, mock_get_logger, mock_run, patch_clone_dir): 377 | setup_bundler(False, None, None) 378 | 379 | mock_get_logger.assert_called_once_with('setup-bundler') 380 | 381 | mock_logger = mock_get_logger.return_value 382 | 383 | mock_logger.info.assert_has_calls([ 384 | call('No Gemfile found, installing Jekyll.') 385 | ]) 386 | 387 | mock_run.assert_called_once_with( 388 | mock_logger, 'gem install jekyll -v 4.2.2 --no-document', 389 | cwd=patch_clone_dir, env={}, ruby=True 390 | ) 391 | 392 | def test_it_uses_default_version_if_only_gemfile_exits(self, mock_get_logger, 393 | mock_run, patch_clone_dir): 394 | default_version = '<2' 395 | create_file(patch_clone_dir / GEMFILE, 'foo') 396 | 397 | mock_run.return_value = 0 398 | 399 | setup_bundler(False, None, None) 400 | 401 | mock_get_logger.assert_called_once_with('setup-bundler') 402 | 403 | mock_logger = mock_get_logger.return_value 404 | 405 | mock_logger.info.assert_has_calls([ 406 | call('Gemfile found, setting up bundler'), 407 | call('Installing dependencies in Gemfile'), 408 | ]) 409 | 410 | def callp(cmd): 411 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True) 412 | 413 | mock_run.assert_has_calls([ 414 | callp(f'gem install bundler --version "{default_version}"'), 415 | callp('bundle install'), 416 | ]) 417 | 418 | def test_it_uses_bundler_version_if_gemfile_and_bundler_file_exists(self, mock_get_logger, 419 | mock_run, patch_clone_dir): 420 | version = '2.0.1' 421 | 422 | create_file(patch_clone_dir / GEMFILE, 'foo') 423 | create_file(patch_clone_dir / BUNDLER_VERSION, version) 424 | 425 | mock_run.return_value = 0 426 | 427 | setup_bundler(False, None, None) 428 | 429 | mock_get_logger.assert_called_once_with('setup-bundler') 430 | 431 | mock_logger = mock_get_logger.return_value 432 | 433 | mock_logger.info.assert_has_calls([ 434 | call('Gemfile found, setting up bundler'), 435 | call('Using bundler version in .bundler-version'), 436 | call('Installing dependencies in Gemfile'), 437 | ]) 438 | 439 | def callp(cmd): 440 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True) 441 | 442 | mock_run.assert_has_calls([ 443 | callp(f'gem install bundler --version "{version}"'), 444 | callp('bundle install'), 445 | ]) 446 | 447 | 448 | @patch('steps.build.run') 449 | @patch('steps.build.get_logger') 450 | class TestBuildJekyll(): 451 | def test_with_no_gemfile(self, mock_get_logger, mock_run, patch_clone_dir, 452 | patch_site_build_dir): 453 | command = 'jekyll' 454 | 455 | create_file(patch_clone_dir / JEKYLL_CONFIG_YML, 'hi: test') 456 | 457 | kwargs = dict( 458 | branch='branch', owner='owner', 459 | repository='repo', site_prefix='site/prefix', 460 | base_url='/site/prefix', config=json.dumps(dict(boop='beep')) 461 | ) 462 | 463 | build_jekyll(**kwargs) 464 | 465 | mock_get_logger.assert_has_calls([call('build-jekyll'), call('build-jekyll')]) 466 | 467 | mock_logger = mock_get_logger.return_value 468 | 469 | env = build_env( 470 | kwargs['branch'], kwargs['owner'], kwargs['repository'], 471 | kwargs['site_prefix'], kwargs['base_url'] 472 | ) 473 | env['JEKYLL_ENV'] = 'production' 474 | 475 | mock_run.assert_has_calls([ 476 | call( 477 | mock_logger, 478 | f'echo Building using Jekyll version: $({command} -v)', 479 | cwd=patch_clone_dir, 480 | env={}, 481 | ruby=True, 482 | ), 483 | call( 484 | mock_logger, 485 | f'{command} build --destination {patch_site_build_dir}', 486 | cwd=patch_clone_dir, 487 | env=env, 488 | node=True, 489 | ruby=True, 490 | ) 491 | ]) 492 | 493 | def test_with_gemfile(self, mock_get_logger, mock_run, patch_clone_dir, patch_site_build_dir): 494 | command = 'bundle exec jekyll' 495 | 496 | create_file(patch_clone_dir / GEMFILE, 'foo') 497 | create_file(patch_clone_dir / JEKYLL_CONFIG_YML, 'hi: test') 498 | 499 | kwargs = dict( 500 | branch='branch', owner='owner', 501 | repository='repo', site_prefix='site/prefix', 502 | base_url='/site/prefix', config=json.dumps(dict(boop='beep')) 503 | ) 504 | 505 | build_jekyll(**kwargs) 506 | 507 | mock_get_logger.assert_has_calls([call('build-jekyll'), call('build-jekyll')]) 508 | 509 | mock_logger = mock_get_logger.return_value 510 | 511 | env = build_env( 512 | kwargs['branch'], kwargs['owner'], kwargs['repository'], 513 | kwargs['site_prefix'], kwargs['base_url'] 514 | ) 515 | env['JEKYLL_ENV'] = 'production' 516 | 517 | mock_run.assert_has_calls([ 518 | call( 519 | mock_logger, 520 | f'echo Building using Jekyll version: $({command} -v)', 521 | cwd=patch_clone_dir, 522 | env={}, 523 | ruby=True, 524 | ), 525 | call( 526 | mock_logger, 527 | f'{command} build --destination {patch_site_build_dir}', 528 | cwd=patch_clone_dir, 529 | env=env, 530 | node=True, 531 | ruby=True, 532 | ) 533 | ]) 534 | 535 | def test_config_file_is_updated(self, mock_get_logger, mock_run, patch_clone_dir, 536 | patch_site_build_dir): 537 | conf_path = patch_clone_dir / JEKYLL_CONFIG_YML 538 | create_file(conf_path, 'hi: test') 539 | 540 | kwargs = dict( 541 | branch='branch', owner='owner', 542 | repository='repo', site_prefix='site/prefix', 543 | config=json.dumps(dict(boop='beep')), base_url='/site/prefix' 544 | ) 545 | 546 | build_jekyll(**kwargs) 547 | 548 | with conf_path.open() as f: 549 | config = yaml.safe_load(f) 550 | assert config['hi'] == 'test' 551 | assert config['baseurl'] == kwargs['base_url'] 552 | assert config['branch'] == kwargs['branch'] 553 | 554 | 555 | @patch('steps.build.run') 556 | @patch('steps.build.get_logger') 557 | class TestDownloadHugo(): 558 | def test_it_is_callable(self, mock_get_logger, mock_run, patch_working_dir, patch_clone_dir): 559 | version = '0.44' 560 | tar_cmd = f'tar -xzf {patch_working_dir}/hugo.tar.gz -C {patch_working_dir}' 561 | chmod_cmd = f'chmod +x {patch_working_dir}/hugo' 562 | dl_url = ( 563 | 'https://github.com/gohugoio/hugo/releases/download/v' 564 | f'{version}/hugo_{version}_Linux-64bit.tar.gz' 565 | ) 566 | print(dl_url) 567 | 568 | create_file(patch_clone_dir / HUGO_VERSION, version) 569 | 570 | mock_post_metrics = Mock() 571 | with requests_mock.Mocker() as m: 572 | m.get(dl_url, text='fake-data') 573 | download_hugo(mock_post_metrics) 574 | 575 | mock_get_logger.assert_called_once_with('download-hugo') 576 | 577 | mock_logger = mock_get_logger.return_value 578 | 579 | mock_logger.info.assert_has_calls([ 580 | call('.hugo-version found'), 581 | call(f'Using hugo version in .hugo-version: {version}'), 582 | call(f'Downloading hugo version {version}') 583 | ]) 584 | 585 | mock_run.assert_has_calls([ 586 | call(mock_logger, tar_cmd, env={}), 587 | call(mock_logger, chmod_cmd, env={}) 588 | ]) 589 | 590 | def test_it_is_callable_retry(self, mock_get_logger, mock_run, patch_working_dir, 591 | patch_clone_dir): 592 | version = '0.44' 593 | tar_cmd = f'tar -xzf {patch_working_dir}/hugo.tar.gz -C {patch_working_dir}' 594 | chmod_cmd = f'chmod +x {patch_working_dir}/hugo' 595 | dl_url = ( 596 | 'https://github.com/gohugoio/hugo/releases/download/v' 597 | f'{version}/hugo_{version}_Linux-64bit.tar.gz' 598 | ) 599 | 600 | create_file(patch_clone_dir / HUGO_VERSION, version) 601 | 602 | mock_post_metrics = Mock() 603 | with requests_mock.Mocker() as m: 604 | m.get(dl_url, [ 605 | dict(exc=requests.exceptions.ConnectTimeout), 606 | dict(exc=requests.exceptions.ConnectTimeout), 607 | dict(exc=requests.exceptions.ConnectTimeout), 608 | dict(exc=requests.exceptions.ConnectTimeout), 609 | dict(text='fake-data') 610 | ]) 611 | 612 | download_hugo(mock_post_metrics) 613 | 614 | mock_get_logger.assert_called_once_with('download-hugo') 615 | 616 | mock_logger = mock_get_logger.return_value 617 | 618 | mock_logger.info.assert_has_calls([ 619 | call('.hugo-version found'), 620 | call(f'Using hugo version in .hugo-version: {version}'), 621 | call(f'Downloading hugo version {version}'), 622 | call(f'Failed attempt #1 to download hugo version: {version}'), 623 | call(f'Failed attempt #2 to download hugo version: {version}'), 624 | call(f'Failed attempt #3 to download hugo version: {version}'), 625 | call(f'Failed attempt #4 to download hugo version: {version}'), 626 | ]) 627 | 628 | mock_run.assert_has_calls([ 629 | call(mock_logger, tar_cmd, env={}), 630 | call(mock_logger, chmod_cmd, env={}) 631 | ]) 632 | 633 | def test_it_is_exception(self, mock_get_logger, mock_run, patch_working_dir, patch_clone_dir): 634 | version = '0.44' 635 | dl_url = ( 636 | 'https://github.com/gohugoio/hugo/releases/download/v' 637 | f'{version}/hugo_{version}_Linux-64bit.tar.gz' 638 | ) 639 | 640 | create_file(patch_clone_dir / HUGO_VERSION, version) 641 | 642 | mock_post_metrics = Mock() 643 | with pytest.raises(Exception): 644 | with requests_mock.Mocker() as m: 645 | m.get(dl_url, [ 646 | dict(exc=requests.exceptions.ConnectTimeout), 647 | dict(exc=requests.exceptions.ConnectTimeout), 648 | dict(exc=requests.exceptions.ConnectTimeout), 649 | dict(exc=requests.exceptions.ConnectTimeout), 650 | dict(exc=requests.exceptions.ConnectTimeout), 651 | ]) 652 | 653 | download_hugo(mock_post_metrics) 654 | 655 | mock_get_logger.assert_called_once_with('download-hugo') 656 | 657 | mock_logger = mock_get_logger.return_value 658 | 659 | mock_logger.info.assert_has_calls([ 660 | call('.hugo-version found'), 661 | call(f'Using hugo version in .hugo-version: {version}'), 662 | call(f'Downloading hugo version {version}'), 663 | call(f'Failed attempt #1 to download hugo version: {version}'), 664 | call(f'Failed attempt #2 to download hugo version: {version}'), 665 | call(f'Failed attempt #3 to download hugo version: {version}'), 666 | call(f'Failed attempt #4 to download hugo version: {version}'), 667 | call(f'Failed attempt #5 to download hugo version: {version}'), 668 | ]) 669 | 670 | mock_run.assert_not_called() 671 | 672 | 673 | @patch('steps.build.run') 674 | @patch('steps.build.get_logger') 675 | class TestBuildHugo(): 676 | def test_it_calls_hugo_as_expected(self, mock_get_logger, mock_run, 677 | patch_working_dir, patch_clone_dir, 678 | patch_site_build_dir): 679 | 680 | hugo_path = patch_working_dir / HUGO_BIN 681 | hugo_call = ( 682 | f'{hugo_path} --source {patch_clone_dir} ' 683 | f'--destination {patch_site_build_dir} ' 684 | '--baseURL /site/prefix' 685 | ) 686 | 687 | kwargs = dict( 688 | branch='branch', 689 | owner='owner', 690 | repository='repo', 691 | site_prefix='site/prefix', 692 | base_url='/site/prefix' 693 | ) 694 | 695 | build_hugo(**kwargs) 696 | 697 | mock_get_logger.assert_called_once_with('build-hugo') 698 | 699 | mock_logger = mock_get_logger.return_value 700 | 701 | mock_logger.info.assert_called_with( 702 | 'Building site with hugo' 703 | ) 704 | 705 | mock_run.assert_has_calls([ 706 | call( 707 | mock_logger, 708 | f'echo hugo version: $({hugo_path} version)', 709 | env={}, 710 | ), 711 | call( 712 | mock_logger, 713 | hugo_call, 714 | cwd=patch_clone_dir, 715 | env=build_env(*kwargs.values()), 716 | node=True, 717 | ) 718 | ]) 719 | 720 | 721 | class TestBuildstatic(): 722 | def test_it_moves_files_correctly(self, patch_site_build_dir, patch_clone_dir): 723 | for i in range(0, 10): 724 | create_file(patch_clone_dir / f'file_{i}.txt', str(i)) 725 | 726 | assert len(os.listdir(patch_clone_dir)) == 10 727 | assert len(os.listdir(patch_site_build_dir)) == 0 728 | 729 | build_static() 730 | 731 | assert len(os.listdir(patch_clone_dir)) == 0 732 | assert len(os.listdir(patch_site_build_dir)) == 10 733 | 734 | 735 | class TestBuildEnv(): 736 | def test_it_includes_default_values(self): 737 | branch = 'branch' 738 | owner = 'owner' 739 | repository = 'repo' 740 | site_prefix = 'prefix' 741 | base_url = 'url' 742 | 743 | result = build_env(branch, owner, repository, site_prefix, base_url) 744 | 745 | assert result == { 746 | 'BRANCH': branch, 747 | 'OWNER': owner, 748 | 'REPOSITORY': repository, 749 | 'SITE_PREFIX': site_prefix, 750 | 'BASEURL': base_url, 751 | 'LANG': 'en_US.UTF-8', 752 | 'GATSBY_TELEMETRY_DISABLED': '1', 753 | 'HOME': '/home/customer', 754 | } 755 | 756 | def test_it_includes_user_env_vars(self): 757 | branch = 'branch' 758 | owner = 'owner' 759 | repository = 'repo' 760 | site_prefix = 'prefix' 761 | base_url = 'url' 762 | user_env_vars = [ 763 | {'name': 'FOO', 'value': 'bar'} 764 | ] 765 | 766 | result = build_env(branch, owner, repository, site_prefix, 767 | base_url, user_env_vars) 768 | 769 | assert result['FOO'] == 'bar' 770 | 771 | @patch('sys.stdout', new_callable=StringIO) 772 | def test_it_ignores_and_warns_duplicate_user_env_vars(self, mock_stdout): 773 | # and it is case insensitive 774 | branch = 'branch' 775 | owner = 'owner' 776 | repository = 'repo' 777 | site_prefix = 'prefix' 778 | base_url = 'url' 779 | user_env_vars = [ 780 | {'name': 'BASEURL', 'value': 'bar'}, 781 | {'name': 'repository', 'value': 'baz'} 782 | ] 783 | 784 | result = build_env(branch, owner, repository, site_prefix, 785 | base_url, user_env_vars) 786 | 787 | assert result['BASEURL'] == base_url 788 | assert result['REPOSITORY'] == repository 789 | assert ('user environment variable name `BASEURL` conflicts' 790 | ' with system environment variable, it will be ignored.' 791 | ) in mock_stdout.getvalue() 792 | assert ('user environment variable name `repository`' 793 | ' conflicts with system environment variable, it will be' 794 | ' ignored.') in mock_stdout.getvalue() 795 | 796 | 797 | @patch('steps.build.run') 798 | @patch('steps.build.get_logger') 799 | @patch('steps.build.CacheFolder') 800 | @patch('steps.build.subprocess.run') 801 | class TestBuildCache(): 802 | def test_it_uses_ruby_cache_when_gemfile_lock(self, mock_sp_run, mock_cache_folder, 803 | mock_get_logger, mock_run, patch_clone_dir): 804 | default_version = '<2' 805 | create_file(patch_clone_dir / GEMFILE, 'foo') 806 | create_file(patch_clone_dir / GEMFILELOCK, contents='hashable') 807 | 808 | mock_run.return_value = 0 809 | 810 | setup_bundler(True, None, None) 811 | 812 | mock_get_logger.assert_called_once_with('setup-bundler') 813 | 814 | mock_logger = mock_get_logger.return_value 815 | 816 | mock_logger.info.assert_has_calls([ 817 | call('Gemfile found, setting up bundler'), 818 | call(f'{GEMFILELOCK} found. Attempting to download cache'), 819 | call('Installing dependencies in Gemfile'), 820 | ]) 821 | 822 | mock_cache_folder.assert_called_once() 823 | 824 | def callp(cmd, skip_log=False): 825 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, ruby=True) 826 | 827 | mock_run.assert_has_calls([ 828 | callp(f'gem install bundler --version "{default_version}"'), 829 | callp('bundle install'), 830 | ]) 831 | 832 | def test_it_uses_node_cache_when_package_lock(self, mock_sp_run, mock_cache_folder, 833 | mock_get_logger, mock_run, patch_clone_dir): 834 | create_file(patch_clone_dir / PACKAGE_JSON) 835 | create_file(patch_clone_dir / PACKAGE_LOCK, contents='hashable') 836 | 837 | mock_post_metrics = Mock() 838 | setup_node(True, None, None, mock_post_metrics) 839 | 840 | mock_get_logger.assert_called_once_with('setup-node') 841 | 842 | mock_logger = mock_get_logger.return_value 843 | 844 | mock_logger.info.assert_has_calls([ 845 | call('Using default node version'), 846 | call(f'{PACKAGE_LOCK} found. Attempting to download cache'), 847 | call('skipping npm ci and using cache') 848 | ]) 849 | 850 | mock_cache_folder.assert_called_once() 851 | 852 | def callp(cmd, skip_log=False): 853 | return call(mock_logger, cmd, cwd=patch_clone_dir, env={}, node=True, skip_log=skip_log) 854 | 855 | mock_run.assert_has_calls([ 856 | callp('echo Node version: $(node --version)'), 857 | callp('echo NPM version: $(npm --version)'), 858 | ]) 859 | -------------------------------------------------------------------------------- /test/test_cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import pytest 4 | import tempfile 5 | import filecmp 6 | import shutil 7 | 8 | from moto import mock_aws 9 | 10 | from steps.cache import CacheFolder, get_checksum 11 | from log_utils import get_logger 12 | 13 | 14 | @pytest.fixture 15 | def aws_credentials(): 16 | """Mocked AWS Credentials for moto.""" 17 | os.environ["AWS_ACCESS_KEY_ID"] = "testing" 18 | os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" 19 | os.environ["AWS_DEFAULT_REGION"] = "testing" 20 | 21 | 22 | @pytest.fixture 23 | def s3_client(aws_credentials): 24 | with mock_aws(): 25 | conn = boto3.client("s3") 26 | yield conn 27 | 28 | 29 | @pytest.fixture 30 | def bucket(s3_client): 31 | s3_client.create_bucket( 32 | Bucket='testing', 33 | CreateBucketConfiguration={"LocationConstraint": "testing"} 34 | ) 35 | yield 36 | 37 | 38 | @pytest.fixture 39 | def gemfile(): 40 | tmp_file = tempfile.NamedTemporaryFile(delete=False) 41 | tmp_file.write(b'source "https://rubygems.org"') 42 | tmp_file.write(b'gem "jekyll", "~> 4.0"') 43 | yield tmp_file.name 44 | 45 | 46 | @pytest.fixture(autouse=True) 47 | def cache_folder(s3_client, bucket, gemfile, tmpdir): 48 | logger = get_logger('testing') 49 | yield CacheFolder(gemfile, tmpdir, 'testing', s3_client, logger) 50 | 51 | 52 | class TestCache(): 53 | def test_cache_operations(self, cache_folder: CacheFolder): 54 | # first the cache isn't there 55 | assert not cache_folder.exists() 56 | 57 | # add some files and cache them 58 | FILES_TO_CACHE = 5 59 | for _ in range(FILES_TO_CACHE): 60 | tempfile.NamedTemporaryFile(dir=cache_folder.local_folder, delete=False) 61 | cache_folder.zip_upload_folder_to_s3() 62 | 63 | # now the cache exists 64 | assert cache_folder.exists() 65 | 66 | # move the old files to a new directory for comparison 67 | with tempfile.TemporaryDirectory() as download_tmp_dir: 68 | for f in os.listdir(cache_folder.local_folder): 69 | shutil.move( 70 | os.path.join(cache_folder.local_folder, f), 71 | os.path.join(download_tmp_dir, f) 72 | ) 73 | 74 | # download the cache and compare 75 | cache_folder.download_unzip() 76 | dir_comp = filecmp.dircmp(cache_folder.local_folder, download_tmp_dir) 77 | assert len(dir_comp.common) == FILES_TO_CACHE 78 | assert len(dir_comp.diff_files) == 0 79 | 80 | def test_checksum(self, gemfile): 81 | c = get_checksum(gemfile) 82 | assert c == 'd41d8cd98f00b204e9800998ecf8427e' 83 | -------------------------------------------------------------------------------- /test/test_crypto.py: -------------------------------------------------------------------------------- 1 | from crypto.decrypt import decrypt 2 | 3 | 4 | def test_decrypt(): 5 | ciphertext = ('6a7495108a7f8c9ab4d0990854240242:' 6 | 'e05f0d25446be83fa92aa9586610496b:' 7 | '560d3e8ff02f852104417a') 8 | key = 'shhhhhhh' 9 | 10 | expected = 'hello world' 11 | 12 | result = decrypt(ciphertext, key) 13 | 14 | assert result == expected 15 | -------------------------------------------------------------------------------- /test/test_fetch.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | from unittest.mock import patch 4 | # import subprocess # nosec 5 | import pytest 6 | 7 | from steps import fetch_repo, update_repo 8 | from common import CLONE_DIR_PATH 9 | 10 | clone_env = { 11 | 'HOME': '/home' 12 | } 13 | 14 | 15 | @patch('steps.fetch.run') 16 | @patch('steps.fetch.get_logger') 17 | class TestCloneRepo(): 18 | def test_runs_expected_cmds(self, mock_get_logger, mock_run): 19 | owner = 'owner-1' 20 | repository = 'repo-1' 21 | branch = 'main' 22 | 23 | command = (f'git clone -b {branch} --single-branch --depth 1 ' 24 | f'https://github.com/{owner}/{repository}.git ' 25 | f'{CLONE_DIR_PATH}') 26 | 27 | fetch_repo(owner, repository, branch) 28 | 29 | mock_get_logger.assert_called_once_with('clone') 30 | 31 | mock_run.assert_called_once_with(mock_get_logger.return_value, command, env=clone_env, check=False) # noqa: 501 32 | 33 | def test_runs_expected_cmds_with_gh_token(self, mock_get_logger, mock_run): 34 | owner = 'owner-2' 35 | repository = 'repo-2' 36 | branch = 'staging' 37 | github_token = 'ABC123' 38 | 39 | command = (f'git clone -b {branch} --single-branch --depth 1 ' 40 | f'https://{github_token}@github.com/{owner}/{repository}.git ' 41 | f'{CLONE_DIR_PATH}') 42 | 43 | fetch_repo(owner, repository, branch, github_token) 44 | 45 | mock_get_logger.assert_called_once_with('clone') 46 | 47 | mock_run.assert_called_once_with(mock_get_logger.return_value, command, env=clone_env, check=False) # noqa: 501 48 | 49 | 50 | class TestCloneRepoNoMock(unittest.TestCase): 51 | @pytest.fixture(autouse=True) 52 | def inject_fixtures(self, caplog): 53 | self._caplog = caplog 54 | 55 | def test_no_github_permission_warning(self): 56 | owner = 'cloud-gov' 57 | repository = 'cg-site' 58 | branch = 'master' 59 | 60 | # TODO: this is a totally useless test because the CI runner doesn't have git 61 | with self._caplog.at_level(logging.INFO): 62 | fetch_repo(owner, repository, branch) 63 | 64 | assert self._caplog.text 65 | assert 'Permission denied' not in self._caplog.text 66 | 67 | 68 | @patch('steps.fetch.run') 69 | @patch('steps.fetch.get_logger') 70 | class TestUpdateRepo(): 71 | def test_runs_expected_cmds(self, mock_get_logger, mock_run): 72 | clone_dir = 'clone_dir' 73 | 74 | command = 'git pull --unshallow' 75 | 76 | update_repo(clone_dir) 77 | 78 | mock_get_logger.assert_called_once_with('update') 79 | 80 | mock_run.assert_called_once_with(mock_get_logger.return_value, command, cwd=clone_dir) 81 | 82 | 83 | # @patch('steps.fetch.subprocess.run') 84 | # @patch('steps.fetch.get_logger') 85 | # class TestFetchCommitSHA(): 86 | # def test_runs_expected_cmds(self, mock_get_logger, mock_run): 87 | # mock_run.return_value = subprocess.CompletedProcess([], 0, 'commit testSha blah blah') 88 | # clone_dir = 'clone_dir' 89 | 90 | # command = ['git', 'log', '-1'] 91 | # commit_sha = fetch_commit_sha(clone_dir) 92 | 93 | # mock_get_logger.assert_called_once_with('clone') 94 | # mock_run.assert_called_once_with( 95 | # command, 96 | # shell=False, # nosec 97 | # check=True, 98 | # stdout=subprocess.PIPE, 99 | # universal_newlines=True, 100 | # cwd=clone_dir 101 | # ) 102 | # assert commit_sha == 'testSha' 103 | -------------------------------------------------------------------------------- /test/test_log_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from unittest.mock import patch 3 | from time import sleep 4 | 5 | from log_utils.get_logger import ( 6 | LogFilter, Formatter, get_logger, init_logging, 7 | set_log_attrs, DEFAULT_LOG_LEVEL) 8 | from log_utils.db_handler import DBHandler 9 | from log_utils.monitoring import RepeatTimer 10 | 11 | 12 | class TestLogFilter(): 13 | def test_it_filters_message_with_default_mask(self): 14 | priv_values = ['foobar'] 15 | msg = 'hellofoobar' 16 | 17 | filter = LogFilter(priv_values) 18 | record = logging.makeLogRecord({'msg': msg}) 19 | result = filter.filter(record) 20 | 21 | assert(result is True) 22 | assert(record.getMessage() == f'hello{LogFilter.DEFAULT_MASK}') 23 | 24 | def test_it_filters_message_with_custom_mask(self): 25 | priv_values = ['foobar'] 26 | mask = 'TheNumber42' 27 | msg = 'hellofoobar' 28 | 29 | filter = LogFilter(priv_values, mask) 30 | record = logging.makeLogRecord({'msg': msg}) 31 | result = filter.filter(record) 32 | 33 | assert(result is True) 34 | assert(record.getMessage() == f'hello{mask}') 35 | 36 | def test_it_does_not_log_empty_messages(self): 37 | priv_values = [] 38 | msg = '' 39 | 40 | filter = LogFilter(priv_values) 41 | record = logging.makeLogRecord({'msg': msg}) 42 | result = filter.filter(record) 43 | 44 | assert(result is False) 45 | 46 | def test_it_replaces_message_invalid_access_key(self): 47 | priv_values = [] 48 | msg = f'hello{LogFilter.INVALID_ACCESS_KEY}' 49 | 50 | filter = LogFilter(priv_values) 51 | record = logging.makeLogRecord({'msg': msg}) 52 | result = filter.filter(record) 53 | 54 | assert(result is True) 55 | assert(record.getMessage() == ( 56 | 'Whoops, our S3 keys were rotated during your ' 57 | 'build and became out of date. This was not a ' 58 | 'problem with your site build, but if you restart ' 59 | 'the failed build it should work on the next try. ' 60 | 'Sorry for the inconvenience!' 61 | )) 62 | 63 | 64 | class TestFormatter(): 65 | @patch('logging.Formatter.format') 66 | def test_it_populates_empty_strings_if_key_is_missing(self, mock_format): 67 | keys = ['foobar'] 68 | 69 | formatter = Formatter(keys) 70 | record = logging.makeLogRecord({}) 71 | 72 | formatter.format(record) 73 | 74 | assert(record.foobar == '') 75 | mock_format.assert_called_once_with(record) 76 | 77 | @patch('logging.Formatter.format') 78 | def test_it_ignores_key_if_present(self, mock_format): 79 | keys = ['foobar'] 80 | 81 | formatter = Formatter(keys) 82 | record = logging.makeLogRecord({'foobar': 'Hello!'}) 83 | 84 | formatter.format(record) 85 | 86 | assert(record.foobar == 'Hello!') 87 | mock_format.assert_called_once_with(record) 88 | 89 | 90 | class TestGetLogger(): 91 | def test_it_returns_a_logger_with_an_adapter_with_extras(self): 92 | name = 'foobar' 93 | attrs = {'foo': 'bar'} 94 | set_log_attrs(attrs) 95 | 96 | adapter = get_logger(name) 97 | 98 | assert(type(adapter) == logging.LoggerAdapter) 99 | assert(adapter.logger.name == name) 100 | assert(adapter.extra == attrs) 101 | 102 | 103 | @patch('psycopg2.connect') 104 | @patch('logging.basicConfig') 105 | class TestInitLogging(): 106 | def test_it_adds_a_stream_and_db_handlers(self, mock_basic_config, _): 107 | init_logging([], {'buildid': 1234}, 'foo') 108 | 109 | _, kwargs = mock_basic_config.call_args 110 | 111 | assert(kwargs['level'] == DEFAULT_LOG_LEVEL) 112 | assert(len(kwargs['handlers']) == 2) 113 | assert(type(kwargs['handlers'][0]) == logging.StreamHandler) 114 | assert(type(kwargs['handlers'][1]) == DBHandler) 115 | 116 | 117 | @patch('log_utils.monitoring.log_monitoring_metrics') 118 | class TestMonitorLogging(): 119 | def test_it_calls_logger_on_schedule(self, mock_metrics_logger): 120 | logger = get_logger('test') 121 | thread = RepeatTimer(1, mock_metrics_logger, [logger]) 122 | thread.start() 123 | sleep(5) 124 | mock_metrics_logger.assert_called_with(logger) 125 | thread.cancel() 126 | -------------------------------------------------------------------------------- /test/test_publish.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from steps import publish 4 | from common import SITE_BUILD_DIR_PATH 5 | 6 | TEST_BUCKET = 'test-bucket' 7 | 8 | 9 | class TestPublish(): 10 | def test_it_calls_publish_to_s3(self, monkeypatch): 11 | mock_publish_to_s3 = Mock() 12 | monkeypatch.setattr('publishing.s3publisher.publish_to_s3', 13 | mock_publish_to_s3) 14 | 15 | kwargs = dict( 16 | base_url='/site/prefix', 17 | site_prefix='site/prefix', 18 | bucket=TEST_BUCKET, 19 | federalist_config={}, 20 | s3_client=None 21 | ) 22 | 23 | publish(**kwargs) 24 | 25 | mock_publish_to_s3.assert_called_once() 26 | 27 | # check that the `directory` kwarg is a string, not a Path 28 | _, actual_kwargs = mock_publish_to_s3.call_args_list[0] 29 | assert type(actual_kwargs['directory']) == str 30 | assert actual_kwargs['directory'] == str(SITE_BUILD_DIR_PATH) 31 | -------------------------------------------------------------------------------- /test/test_remote_logs.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from log_utils.remote_logs import ( 4 | b64string, post_build_complete, 5 | post_build_error, post_build_timeout, 6 | post_build_processing) 7 | 8 | from log_utils.common import (STATUS_COMPLETE, STATUS_ERROR, STATUS_PROCESSING) 9 | 10 | MOCK_STATUS_URL = 'https://status.example.com' 11 | 12 | 13 | class TestPostBuildComplete(): 14 | @patch('requests.post') 15 | @patch('requests.delete') 16 | def test_it_works(self, mock_del, mock_post): 17 | commit_sha = 'testSha1' 18 | post_build_complete(MOCK_STATUS_URL, commit_sha) 19 | mock_post.assert_called_once_with( 20 | MOCK_STATUS_URL, 21 | json={'status': STATUS_COMPLETE, 'message': '', 'commit_sha': commit_sha}, 22 | timeout=10 23 | ) 24 | 25 | 26 | class TestPostBuildProcessing(): 27 | @patch('requests.post') 28 | def test_it_works(self, mock_post): 29 | post_build_processing(MOCK_STATUS_URL) 30 | mock_post.assert_called_once_with( 31 | MOCK_STATUS_URL, 32 | json={'status': STATUS_PROCESSING, 'message': '', 'commit_sha': None}, 33 | timeout=10 34 | ) 35 | 36 | 37 | class TestPostBuildError(): 38 | @patch('requests.post') 39 | @patch('requests.delete') 40 | def test_it_works(self, mock_del, mock_post): 41 | commit_sha = 'testSha2' 42 | post_build_error(MOCK_STATUS_URL, 'error msg', commit_sha) 43 | 44 | assert mock_post.call_count == 1 45 | 46 | mock_post.assert_any_call( 47 | MOCK_STATUS_URL, 48 | json={ 49 | 'status': STATUS_ERROR, 'message': b64string('error msg'), 'commit_sha': commit_sha 50 | }, 51 | timeout=10 52 | ) 53 | 54 | 55 | class TestPostBuildTimeout(): 56 | @patch('requests.post') 57 | @patch('requests.delete') 58 | def test_it_works(self, mock_del, mock_post): 59 | commit_sha = 'testSha3' 60 | post_build_timeout(MOCK_STATUS_URL, commit_sha) 61 | 62 | expected_output = b64string( 63 | 'The build did not complete. It may have timed out.') 64 | 65 | assert mock_post.call_count == 1 66 | mock_post.assert_any_call( 67 | MOCK_STATUS_URL, 68 | json={'status': STATUS_ERROR, 'message': expected_output, 'commit_sha': commit_sha}, 69 | timeout=10 70 | ) 71 | -------------------------------------------------------------------------------- /test/test_repo_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import repo_config 4 | import pytest 5 | from .support import create_file, patch_dir 6 | import steps 7 | 8 | 9 | @pytest.fixture 10 | def patch_clone_dir(monkeypatch): 11 | yield from patch_dir(monkeypatch, steps.build, 'CLONE_DIR_PATH') 12 | 13 | 14 | class TestRepoConfig(): 15 | def test_it_loads_federalist_json_when_it_exists(self, patch_clone_dir): 16 | filename = 'federalist.json' 17 | json_contents = json.dumps({ 18 | 'name': filename, 19 | }) 20 | create_file(patch_clone_dir / filename, contents=json_contents) 21 | result = repo_config.from_json_file(patch_clone_dir) 22 | assert result.config['name'] == filename 23 | assert len(os.listdir(patch_clone_dir)) == 1 24 | 25 | def test_it_loads_pages_json_when_it_exists(self, patch_clone_dir): 26 | filename = 'pages.json' 27 | json_contents = json.dumps({ 28 | 'name': filename, 29 | }) 30 | create_file(patch_clone_dir / filename, contents=json_contents) 31 | result = repo_config.from_json_file(patch_clone_dir) 32 | assert result.config['name'] == filename 33 | assert len(os.listdir(patch_clone_dir)) == 1 34 | 35 | def test_it_loads_pages_json_when_federalist_json_also_exists(self, patch_clone_dir): 36 | filename = 'federalist.json' 37 | json_contents = json.dumps({ 38 | 'name': filename, 39 | }) 40 | create_file(patch_clone_dir / filename, contents=json_contents) 41 | filename = 'pages.json' 42 | json_contents = json.dumps({ 43 | 'name': filename, 44 | }) 45 | create_file(patch_clone_dir / filename, contents=json_contents) 46 | result = repo_config.from_json_file(patch_clone_dir) 47 | assert result.config['name'] == 'pages.json' 48 | assert len(os.listdir(patch_clone_dir)) == 2 49 | -------------------------------------------------------------------------------- /test/test_runner.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | import shlex 3 | import subprocess # nosec 4 | from unittest.mock import Mock, patch 5 | 6 | from runner import run, setuser, NVM_PATH, RVM_PATH 7 | 8 | 9 | @patch('subprocess.Popen', autospec=True) 10 | def test_run(mock_popen): 11 | mock_logger = Mock() 12 | command = 'foobar' 13 | 14 | mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar'))) 15 | 16 | run(mock_logger, command) 17 | 18 | mock_popen.assert_called_once_with( 19 | shlex.split(command), 20 | cwd=None, 21 | env=None, 22 | shell=False, 23 | executable=None, 24 | stderr=subprocess.STDOUT, 25 | stdout=subprocess.PIPE, 26 | bufsize=1, 27 | encoding='utf-8', 28 | text=True, 29 | preexec_fn=setuser, 30 | ) 31 | 32 | mock_logger.info.assert_called_once_with('foobar') 33 | 34 | 35 | @patch('subprocess.Popen', autospec=True) 36 | def test_run_popen_failure(mock_popen): 37 | mock_logger = Mock() 38 | command = 'foobar' 39 | 40 | mock_popen.side_effect = ValueError('ugh') 41 | 42 | with raises(ValueError): 43 | run(mock_logger, command) 44 | 45 | mock_popen.assert_called_once_with( 46 | shlex.split(command), 47 | cwd=None, 48 | env=None, 49 | shell=False, 50 | executable=None, 51 | stderr=subprocess.STDOUT, 52 | stdout=subprocess.PIPE, 53 | bufsize=1, 54 | encoding='utf-8', 55 | text=True, 56 | preexec_fn=setuser, 57 | ) 58 | 59 | mock_logger.error.assert_any_call('Encountered a problem invoking Popen.') 60 | mock_logger.error.assert_any_call('ugh') 61 | 62 | 63 | @patch('subprocess.Popen', autospec=True) 64 | def test_run_popen_failure_check_false(mock_popen): 65 | mock_logger = Mock() 66 | command = 'foobar' 67 | return_code = 1 68 | 69 | mock_popen.side_effect = ValueError('ugh') 70 | 71 | result = run(mock_logger, command, check=False) 72 | 73 | assert result == return_code 74 | 75 | mock_popen.assert_called_once_with( 76 | shlex.split(command), 77 | cwd=None, 78 | env=None, 79 | shell=False, 80 | executable=None, 81 | stderr=subprocess.STDOUT, 82 | stdout=subprocess.PIPE, 83 | bufsize=1, 84 | encoding='utf-8', 85 | text=True, 86 | preexec_fn=setuser, 87 | ) 88 | 89 | mock_logger.error.assert_any_call('Encountered a problem invoking Popen.') 90 | mock_logger.error.assert_any_call('ugh') 91 | 92 | 93 | @patch('subprocess.Popen', autospec=True) 94 | def test_run_popen_output(mock_popen): 95 | mock_logger = Mock() 96 | command = 'foobar' 97 | 98 | string_output = 'string_output' 99 | mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value=string_output))) # noqa: E501 100 | 101 | result = run(mock_logger, command) 102 | assert result == string_output 103 | 104 | mock_popen.assert_called_once_with( 105 | shlex.split(command), 106 | cwd=None, 107 | env=None, 108 | shell=False, 109 | executable=None, 110 | stderr=subprocess.STDOUT, 111 | stdout=subprocess.PIPE, 112 | bufsize=1, 113 | encoding='utf-8', 114 | text=True, 115 | preexec_fn=setuser, 116 | ) 117 | 118 | 119 | @patch('subprocess.Popen', autospec=True) 120 | def test_run_os_failure_check_false(mock_popen): 121 | mock_logger = Mock() 122 | command = 'foobar' 123 | 124 | mock_popen.side_effect = OSError('ugh') 125 | 126 | result = run(mock_logger, command, check=False) 127 | 128 | mock_popen.assert_called_once_with( 129 | shlex.split(command), 130 | cwd=None, 131 | env=None, 132 | shell=False, 133 | executable=None, 134 | stderr=subprocess.STDOUT, 135 | stdout=subprocess.PIPE, 136 | bufsize=1, 137 | encoding='utf-8', 138 | text=True, 139 | preexec_fn=setuser 140 | ) 141 | 142 | mock_logger.error.assert_any_call( 143 | 'Encountered a problem executing `' + ' '.join(shlex.split(command)) + '`.' 144 | ) 145 | mock_logger.error.assert_any_call('ugh') 146 | 147 | assert result == 1 148 | 149 | 150 | @patch('subprocess.Popen', autospec=True) 151 | def test_run_os_failure_check_true(mock_popen): 152 | mock_logger = Mock() 153 | command = 'foobar' 154 | 155 | mock_popen.side_effect = OSError('ugh') 156 | 157 | with raises(OSError, match='ugh'): 158 | run(mock_logger, command) 159 | 160 | mock_popen.assert_called_once_with( 161 | shlex.split(command), 162 | cwd=None, 163 | env=None, 164 | shell=False, 165 | executable=None, 166 | stderr=subprocess.STDOUT, 167 | stdout=subprocess.PIPE, 168 | bufsize=1, 169 | encoding='utf-8', 170 | text=True, 171 | preexec_fn=setuser 172 | ) 173 | 174 | mock_logger.error.assert_any_call( 175 | 'Encountered a problem executing `' + ' '.join(shlex.split(command)) + '`.' 176 | ) 177 | mock_logger.error.assert_any_call('ugh') 178 | 179 | 180 | @patch('subprocess.Popen', autospec=True) 181 | def test_run_command_failure_check_false(mock_popen): 182 | mock_logger = Mock() 183 | command = 'foobar' 184 | return_code = 2 185 | 186 | mock_popen.return_value = Mock(returncode=return_code, stdout=Mock(readline=Mock(return_value='text'))) # noqa: E501 187 | 188 | result = run(mock_logger, command, check=False) 189 | 190 | mock_popen.assert_called_once_with( 191 | shlex.split(command), 192 | cwd=None, 193 | env=None, 194 | shell=False, 195 | executable=None, 196 | stderr=subprocess.STDOUT, 197 | stdout=subprocess.PIPE, 198 | bufsize=1, 199 | encoding='utf-8', 200 | text=True, 201 | preexec_fn=setuser 202 | ) 203 | 204 | assert result == return_code 205 | 206 | 207 | @patch('subprocess.Popen', autospec=True) 208 | def test_run_command_failure_check_true(mock_popen): 209 | mock_logger = Mock() 210 | command = 'foobar' 211 | return_code = 2 212 | 213 | mock_popen.return_value = Mock(returncode=return_code, stdout=Mock(readline=Mock(return_value='text'))) # noqa: E501 214 | 215 | with raises(subprocess.CalledProcessError): 216 | run(mock_logger, command) 217 | 218 | mock_popen.assert_called_once_with( 219 | shlex.split(command), 220 | cwd=None, 221 | env=None, 222 | shell=False, 223 | executable=None, 224 | stderr=subprocess.STDOUT, 225 | stdout=subprocess.PIPE, 226 | bufsize=1, 227 | encoding='utf-8', 228 | text=True, 229 | preexec_fn=setuser 230 | ) 231 | 232 | 233 | @patch('subprocess.Popen', autospec=True) 234 | def test_run_with_node(mock_popen): 235 | mock_logger = Mock() 236 | command = 'foobar' 237 | cwd = '/foo' 238 | env = {} 239 | 240 | mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar'))) 241 | 242 | run(mock_logger, command, cwd=cwd, env=env, node=True) 243 | 244 | mock_popen.assert_called_once_with( 245 | f'source {NVM_PATH} && {command}', 246 | cwd=cwd, 247 | env=env, 248 | shell=True, # nosec 249 | executable='/bin/bash', 250 | stderr=subprocess.STDOUT, 251 | stdout=subprocess.PIPE, 252 | bufsize=1, 253 | encoding='utf-8', 254 | text=True, 255 | preexec_fn=setuser 256 | ) 257 | 258 | 259 | @patch('subprocess.Popen', autospec=True) 260 | def test_run_with_ruby(mock_popen): 261 | mock_logger = Mock() 262 | command = 'foobar' 263 | cwd = '/foo' 264 | env = {} 265 | 266 | mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar'))) 267 | 268 | run(mock_logger, command, cwd=cwd, env=env, ruby=True) 269 | 270 | mock_popen.assert_called_once_with( 271 | f'source {RVM_PATH} && {command}', 272 | cwd=cwd, 273 | env=env, 274 | shell=True, # nosec 275 | executable='/bin/bash', 276 | stderr=subprocess.STDOUT, 277 | stdout=subprocess.PIPE, 278 | bufsize=1, 279 | encoding='utf-8', 280 | text=True, 281 | preexec_fn=setuser 282 | ) 283 | 284 | 285 | def test_access_environ(): 286 | mock_logger = Mock() 287 | command = 'cat /proc/1/environ' 288 | env = {} 289 | 290 | run(mock_logger, command, env=env, check=False) 291 | 292 | mock_logger.info.assert_any_call('cat: /proc/1/environ: Permission denied') 293 | 294 | 295 | @patch('subprocess.Popen', autospec=True) 296 | def test_run_skip_log(mock_popen): 297 | mock_logger = Mock() 298 | command = 'foobar' 299 | 300 | mock_popen.return_value = Mock(returncode=0, stdout=Mock(readline=Mock(return_value='foobar'))) 301 | 302 | run(mock_logger, command, skip_log=True) 303 | 304 | mock_popen.assert_called_once_with( 305 | shlex.split(command), 306 | cwd=None, 307 | env=None, 308 | shell=False, 309 | executable=None, 310 | stderr=subprocess.STDOUT, 311 | stdout=subprocess.PIPE, 312 | bufsize=1, 313 | encoding='utf-8', 314 | text=True, 315 | preexec_fn=setuser, 316 | ) 317 | 318 | mock_logger.info.assert_not_called() 319 | --------------------------------------------------------------------------------