├── .ansible-lint ├── .bandit.yml ├── .coveragerc ├── .flake8 ├── .github ├── CODEOWNERS ├── dependabot.yml ├── labels.yml ├── lineage.yml └── workflows │ ├── build.yml │ ├── codeql-analysis.yml │ ├── dependency-review.yml │ └── sync-labels.yml ├── .gitignore ├── .isort.cfg ├── .mdl_config.yaml ├── .pre-commit-config.yaml ├── .prettierignore ├── .yamllint ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bump-version ├── gce-scripts ├── README.md ├── check_instances.sh ├── combine_shards.py ├── grab_and_combine_data.sh ├── packages_to_install.sh ├── run_all_scripts.sh ├── run_instances.sh ├── running_script.sh ├── scp_and_setup.sh └── split_up_dataset.sh ├── pytest.ini ├── requirements-dev.txt ├── requirements-test.txt ├── requirements.txt ├── setup-env ├── setup.py ├── src └── pshtt │ ├── __init__.py │ ├── __main__.py │ ├── _version.py │ ├── cli.py │ ├── models.py │ ├── pshtt.py │ └── utils.py ├── tag.sh └── tests ├── conftest.py ├── test_badssl.py ├── test_cli.py ├── test_definitions.py ├── test_pshtt.py └── test_utils.py /.ansible-lint: -------------------------------------------------------------------------------- 1 | --- 2 | # See https://ansible-lint.readthedocs.io/configuring/ for a list of 3 | # the configuration elements that can exist in this file. 4 | enable_list: 5 | # Useful checks that one must opt-into. See here for more details: 6 | # https://ansible-lint.readthedocs.io/rules/ 7 | - fcqn-builtins 8 | - no-log-password 9 | - no-same-owner 10 | exclude_paths: 11 | # This exclusion is implicit, unless exclude_paths is defined 12 | - .cache 13 | # Seems wise to ignore this too 14 | - .github 15 | kinds: 16 | # This will force our systemd specific molecule configurations to be treated 17 | # as plain yaml files by ansible-lint. This mirrors the default kind 18 | # configuration in ansible-lint for molecule configurations: 19 | # yaml: "**/molecule/*/{base,molecule}.{yaml,yml}" 20 | - yaml: "**/molecule/*/molecule-{no,with}-systemd.yml" 21 | use_default_rules: true 22 | -------------------------------------------------------------------------------- /.bandit.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Configuration file for the Bandit python security scanner 3 | # https://bandit.readthedocs.io/en/latest/config.html 4 | # This config is applied to bandit when scanning the "tests" tree 5 | 6 | # Tests are first included by `tests`, and then excluded by `skips`. 7 | # If `tests` is empty, all tests are considered included. 8 | 9 | tests: 10 | # - B101 11 | # - B102 12 | 13 | skips: 14 | - B101 # skip "assert used" check since assertions are required in pytests 15 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # This is the configuration for code coverage checks 2 | # https://coverage.readthedocs.io/en/latest/config.html 3 | 4 | [run] 5 | source = src/pshtt 6 | omit = 7 | branch = true 8 | 9 | [report] 10 | exclude_lines = 11 | if __name__ == "__main__": 12 | show_missing = true 13 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | # Select (turn on) 4 | # * Complexity violations reported by mccabe (C) - 5 | # http://flake8.pycqa.org/en/latest/user/error-codes.html#error-violation-codes 6 | # * Documentation conventions compliance reported by pydocstyle (D) - 7 | # http://www.pydocstyle.org/en/stable/error_codes.html 8 | # * Default errors and warnings reported by pycodestyle (E and W) - 9 | # https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes 10 | # * Default errors reported by pyflakes (F) - 11 | # http://flake8.pycqa.org/en/latest/glossary.html#term-pyflakes 12 | # * Default warnings reported by flake8-bugbear (B) - 13 | # https://github.com/PyCQA/flake8-bugbear#list-of-warnings 14 | # * The B950 flake8-bugbear opinionated warning - 15 | # https://github.com/PyCQA/flake8-bugbear#opinionated-warnings 16 | select = C,D,E,F,W,B,B950 17 | # Ignore flake8's default warning about "whitespace before ':'" as it is not 18 | # PEP 8 compliant and conflicts with black's styling. 19 | # 20 | # Ignore flake8's default warning about maximum line length, which has 21 | # a hard stop at the configured value. Instead we use 22 | # flake8-bugbear's B950, which allows up to 10% overage. 23 | # 24 | # Also ignore flake8's warning about line breaks before binary 25 | # operators. It no longer agrees with PEP8. See, for example, here: 26 | # https://github.com/ambv/black/issues/21. Guido agrees here: 27 | # https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b. 28 | ignore = E203,E501,W503 29 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Each line is a file pattern followed by one or more owners. 2 | 3 | # These owners will be the default owners for everything in the 4 | # repo. Unless a later match takes precedence, these owners will be 5 | # requested for review when someone opens a pull request. 6 | * @dav3r @felddy @IanLee1521 @jsf9k @mcdonnnj 7 | 8 | # These folks own any files in the .github directory at the root of 9 | # the repository and any of its subdirectories. 10 | /.github/ @dav3r @felddy @jsf9k @mcdonnnj 11 | 12 | # These folks own all linting configuration files. 13 | /.ansible-lint @dav3r @felddy @jsf9k @mcdonnnj 14 | /.bandit.yml @dav3r @felddy @jsf9k @mcdonnnj 15 | /.flake8 @dav3r @felddy @jsf9k @mcdonnnj 16 | /.isort.cfg @dav3r @felddy @jsf9k @mcdonnnj 17 | /.mdl_config.yaml @dav3r @felddy @jsf9k @mcdonnnj 18 | /.pre-commit-config.yaml @dav3r @felddy @jsf9k @mcdonnnj 19 | /.prettierignore @dav3r @felddy @jsf9k @mcdonnnj 20 | /.yamllint @dav3r @felddy @jsf9k @mcdonnnj 21 | /requirements.txt @dav3r @felddy @jsf9k @mcdonnnj 22 | /requirements-dev.txt @dav3r @felddy @jsf9k @mcdonnnj 23 | /requirements-test.txt @dav3r @felddy @jsf9k @mcdonnnj 24 | /setup-env @dav3r @felddy @jsf9k @mcdonnnj 25 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # Any ignore directives should be uncommented in downstream projects to disable 4 | # Dependabot updates for the given dependency. Downstream projects will get 5 | # these updates when the pull request(s) in the appropriate skeleton are merged 6 | # and Lineage processes these changes. 7 | 8 | updates: 9 | - directory: / 10 | ignore: 11 | # Managed by cisagov/skeleton-generic 12 | - dependency-name: actions/cache 13 | - dependency-name: actions/checkout 14 | - dependency-name: actions/dependency-review-action 15 | - dependency-name: actions/setup-go 16 | - dependency-name: actions/setup-python 17 | - dependency-name: cisagov/action-job-preamble 18 | - dependency-name: cisagov/setup-env-github-action 19 | - dependency-name: crazy-max/ghaction-github-labeler 20 | - dependency-name: github/codeql-action 21 | - dependency-name: hashicorp/setup-packer 22 | - dependency-name: hashicorp/setup-terraform 23 | - dependency-name: mxschmitt/action-tmate 24 | # Managed by cisagov/skeleton-python-library 25 | - dependency-name: actions/download-artifact 26 | - dependency-name: actions/upload-artifact 27 | package-ecosystem: github-actions 28 | schedule: 29 | interval: weekly 30 | 31 | - directory: / 32 | package-ecosystem: pip 33 | schedule: 34 | interval: weekly 35 | 36 | - directory: / 37 | package-ecosystem: terraform 38 | schedule: 39 | interval: weekly 40 | version: 2 41 | -------------------------------------------------------------------------------- /.github/labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Rather than breaking up descriptions into multiline strings we disable that 3 | # specific rule in yamllint for this file. 4 | # yamllint disable rule:line-length 5 | - color: eb6420 6 | description: This issue or pull request is awaiting the outcome of another issue or pull request 7 | name: blocked 8 | - color: "000000" 9 | description: This issue or pull request involves changes to existing functionality 10 | name: breaking change 11 | - color: d73a4a 12 | description: This issue or pull request addresses broken functionality 13 | name: bug 14 | - color: 07648d 15 | description: This issue will be advertised on code.gov's Open Tasks page (https://code.gov/open-tasks) 16 | name: code.gov 17 | - color: 0366d6 18 | description: Pull requests that update a dependency file 19 | name: dependencies 20 | - color: 5319e7 21 | description: This issue or pull request improves or adds to documentation 22 | name: documentation 23 | - color: cfd3d7 24 | description: This issue or pull request already exists or is covered in another issue or pull request 25 | name: duplicate 26 | - color: b005bc 27 | description: A high-level objective issue encompassing multiple issues instead of a specific unit of work 28 | name: epic 29 | - color: "000000" 30 | description: Pull requests that update GitHub Actions code 31 | name: github-actions 32 | - color: 0e8a16 33 | description: This issue or pull request is well-defined and good for newcomers 34 | name: good first issue 35 | - color: ff7518 36 | description: Pull request that should count toward Hacktoberfest participation 37 | name: hacktoberfest-accepted 38 | - color: a2eeef 39 | description: This issue or pull request will add or improve functionality, maintainability, or ease of use 40 | name: improvement 41 | - color: fef2c0 42 | description: This issue or pull request is not applicable, incorrect, or obsolete 43 | name: invalid 44 | - color: ce099a 45 | description: This pull request is ready to merge during the next Lineage Kraken release 46 | name: kraken 🐙 47 | - color: a4fc5d 48 | description: This issue or pull request requires further information 49 | name: need info 50 | - color: fcdb45 51 | description: This pull request is awaiting an action or decision to move forward 52 | name: on hold 53 | - color: 3772a4 54 | description: Pull requests that update Python code 55 | name: python 56 | - color: ef476c 57 | description: This issue is a request for information or needs discussion 58 | name: question 59 | - color: d73a4a 60 | description: This issue or pull request addresses a security issue 61 | name: security 62 | - color: 00008b 63 | description: This issue or pull request adds or otherwise modifies test code 64 | name: test 65 | - color: 1d76db 66 | description: This issue or pull request pulls in upstream updates 67 | name: upstream update 68 | - color: d4c5f9 69 | description: This issue or pull request increments the version number 70 | name: version bump 71 | - color: ffffff 72 | description: This issue will not be incorporated 73 | name: wontfix 74 | -------------------------------------------------------------------------------- /.github/lineage.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lineage: 3 | skeleton: 4 | remote-url: https://github.com/cisagov/skeleton-python-library.git 5 | version: "1" 6 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: build 3 | 4 | on: # yamllint disable-line rule:truthy 5 | merge_group: 6 | types: 7 | - checks_requested 8 | pull_request: 9 | push: 10 | repository_dispatch: 11 | types: 12 | - apb 13 | 14 | # Set a default shell for any run steps. The `-Eueo pipefail` sets errtrace, 15 | # nounset, errexit, and pipefail. The `-x` will print all commands as they are 16 | # run. Please see the GitHub Actions documentation for more information: 17 | # https://docs.github.com/en/actions/using-jobs/setting-default-values-for-jobs 18 | defaults: 19 | run: 20 | shell: bash -Eueo pipefail -x {0} 21 | 22 | env: 23 | PIP_CACHE_DIR: ~/.cache/pip 24 | PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit 25 | RUN_TMATE: ${{ secrets.RUN_TMATE }} 26 | TERRAFORM_DOCS_REPO_BRANCH_NAME: improvement/support_atx_closed_markdown_headers 27 | TERRAFORM_DOCS_REPO_DEPTH: 1 28 | TERRAFORM_DOCS_REPO_URL: https://github.com/mcdonnnj/terraform-docs.git 29 | 30 | jobs: 31 | diagnostics: 32 | name: Run diagnostics 33 | # This job does not need any permissions 34 | permissions: {} 35 | runs-on: ubuntu-latest 36 | steps: 37 | # Note that a duplicate of this step must be added at the top of 38 | # each job. 39 | - name: Apply standard cisagov job preamble 40 | uses: cisagov/action-job-preamble@v1 41 | with: 42 | check_github_status: "true" 43 | # This functionality is poorly implemented and has been 44 | # causing problems due to the MITM implementation hogging or 45 | # leaking memory. As a result we disable it by default. If 46 | # you want to temporarily enable it, simply set 47 | # monitor_permissions equal to "true". 48 | # 49 | # TODO: Re-enable this functionality when practical. See 50 | # cisagov/skeleton-generic#207 for more details. 51 | monitor_permissions: "false" 52 | output_workflow_context: "true" 53 | # Use a variable to specify the permissions monitoring 54 | # configuration. By default this will yield the 55 | # configuration stored in the cisagov organization-level 56 | # variable, but if you want to use a different configuration 57 | # then simply: 58 | # 1. Create a repository-level variable with the name 59 | # ACTIONS_PERMISSIONS_CONFIG. 60 | # 2. Set this new variable's value to the configuration you 61 | # want to use for this repository. 62 | # 63 | # Note in particular that changing the permissions 64 | # monitoring configuration *does not* require you to modify 65 | # this workflow. 66 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 67 | lint: 68 | needs: 69 | - diagnostics 70 | permissions: 71 | # actions/checkout needs this to fetch code 72 | contents: read 73 | runs-on: ubuntu-latest 74 | steps: 75 | - name: Apply standard cisagov job preamble 76 | uses: cisagov/action-job-preamble@v1 77 | with: 78 | # This functionality is poorly implemented and has been 79 | # causing problems due to the MITM implementation hogging or 80 | # leaking memory. As a result we disable it by default. If 81 | # you want to temporarily enable it, simply set 82 | # monitor_permissions equal to "true". 83 | # 84 | # TODO: Re-enable this functionality when practical. See 85 | # cisagov/skeleton-generic#207 for more details. 86 | monitor_permissions: "false" 87 | # Use a variable to specify the permissions monitoring 88 | # configuration. By default this will yield the 89 | # configuration stored in the cisagov organization-level 90 | # variable, but if you want to use a different configuration 91 | # then simply: 92 | # 1. Create a repository-level variable with the name 93 | # ACTIONS_PERMISSIONS_CONFIG. 94 | # 2. Set this new variable's value to the configuration you 95 | # want to use for this repository. 96 | # 97 | # Note in particular that changing the permissions 98 | # monitoring configuration *does not* require you to modify 99 | # this workflow. 100 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 101 | - id: setup-env 102 | uses: cisagov/setup-env-github-action@develop 103 | - uses: actions/checkout@v4 104 | - id: setup-python 105 | uses: actions/setup-python@v5 106 | with: 107 | # python-version: ${{ steps.setup-env.outputs.python-version }} 108 | # This project cannot currently support Python 3.11 or 3.12. 109 | python-version: "3.10" 110 | # We need the Go version and Go cache location for the actions/cache step, 111 | # so the Go installation must happen before that. 112 | - id: setup-go 113 | uses: actions/setup-go@v5 114 | with: 115 | # There is no expectation for actual Go code so we disable caching as 116 | # it relies on the existence of a go.sum file. 117 | cache: false 118 | go-version: ${{ steps.setup-env.outputs.go-version }} 119 | - id: go-cache 120 | name: Lookup Go cache directory 121 | run: | 122 | echo "dir=$(go env GOCACHE)" >> $GITHUB_OUTPUT 123 | - uses: actions/cache@v4 124 | env: 125 | BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\ 126 | py${{ steps.setup-python.outputs.python-version }}-\ 127 | go${{ steps.setup-go.outputs.go-version }}-\ 128 | packer${{ steps.setup-env.outputs.packer-version }}-\ 129 | tf${{ steps.setup-env.outputs.terraform-version }}- 130 | with: 131 | # We do not use '**/setup.py' in the cache key so only the 'setup.py' 132 | # file in the root of the repository is used. This is in case a Python 133 | # package were to have a 'setup.py' as part of its internal codebase. 134 | key: ${{ env.BASE_CACHE_KEY }}\ 135 | ${{ hashFiles('**/requirements-test.txt') }}-\ 136 | ${{ hashFiles('**/requirements.txt') }}-\ 137 | ${{ hashFiles('**/.pre-commit-config.yaml') }}-\ 138 | ${{ hashFiles('setup.py') }} 139 | # Note that the .terraform directory IS NOT included in the 140 | # cache because if we were caching, then we would need to use 141 | # the `-upgrade=true` option. This option blindly pulls down the 142 | # latest modules and providers instead of checking to see if an 143 | # update is required. That behavior defeats the benefits of caching. 144 | # so there is no point in doing it for the .terraform directory. 145 | path: | 146 | ${{ env.PIP_CACHE_DIR }} 147 | ${{ env.PRE_COMMIT_CACHE_DIR }} 148 | ${{ steps.go-cache.outputs.dir }} 149 | restore-keys: | 150 | ${{ env.BASE_CACHE_KEY }} 151 | - uses: hashicorp/setup-packer@v3 152 | with: 153 | version: ${{ steps.setup-env.outputs.packer-version }} 154 | - uses: hashicorp/setup-terraform@v3 155 | with: 156 | terraform_version: ${{ steps.setup-env.outputs.terraform-version }} 157 | - name: Install go-critic 158 | env: 159 | PACKAGE_URL: github.com/go-critic/go-critic/cmd/gocritic 160 | PACKAGE_VERSION: ${{ steps.setup-env.outputs.go-critic-version }} 161 | run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} 162 | - name: Install goimports 163 | env: 164 | PACKAGE_URL: golang.org/x/tools/cmd/goimports 165 | PACKAGE_VERSION: ${{ steps.setup-env.outputs.goimports-version }} 166 | run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} 167 | - name: Install gosec 168 | env: 169 | PACKAGE_URL: github.com/securego/gosec/v2/cmd/gosec 170 | PACKAGE_VERSION: ${{ steps.setup-env.outputs.gosec-version }} 171 | run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} 172 | - name: Install staticcheck 173 | env: 174 | PACKAGE_URL: honnef.co/go/tools/cmd/staticcheck 175 | PACKAGE_VERSION: ${{ steps.setup-env.outputs.staticcheck-version }} 176 | run: go install ${PACKAGE_URL}@${PACKAGE_VERSION} 177 | # TODO: https://github.com/cisagov/skeleton-generic/issues/165 178 | # We are temporarily using @mcdonnnj's forked branch of terraform-docs 179 | # until his PR: https://github.com/terraform-docs/terraform-docs/pull/745 180 | # is approved. This temporary fix will allow for ATX header support when 181 | # terraform-docs is run during linting. 182 | - name: Clone ATX headers branch from terraform-docs fork 183 | run: | 184 | git clone \ 185 | --branch $TERRAFORM_DOCS_REPO_BRANCH_NAME \ 186 | --depth $TERRAFORM_DOCS_REPO_DEPTH \ 187 | --single-branch \ 188 | $TERRAFORM_DOCS_REPO_URL /tmp/terraform-docs 189 | - name: Build and install terraform-docs binary 190 | run: | 191 | go build \ 192 | -C /tmp/terraform-docs \ 193 | -o $(go env GOPATH)/bin/terraform-docs 194 | - name: Install dependencies 195 | run: | 196 | python -m pip install --upgrade pip setuptools wheel 197 | pip install --upgrade --requirement requirements-test.txt 198 | - name: Set up pre-commit hook environments 199 | run: pre-commit install-hooks 200 | - name: Run pre-commit on all files 201 | run: pre-commit run --all-files 202 | - name: Setup tmate debug session 203 | uses: mxschmitt/action-tmate@v3 204 | if: env.RUN_TMATE 205 | test: 206 | name: test source - py${{ matrix.python-version }} 207 | needs: 208 | - diagnostics 209 | permissions: 210 | # actions/checkout needs this to fetch code 211 | contents: read 212 | runs-on: ${{ matrix.os }} 213 | strategy: 214 | fail-fast: false 215 | matrix: 216 | include: 217 | - os: ubuntu-22.04 218 | python-version: "3.7" 219 | os: 220 | - ubuntu-latest 221 | python-version: 222 | - "3.8" 223 | - "3.9" 224 | - "3.10" 225 | # - "3.11" 226 | # - "3.12" 227 | # - "3.13" 228 | steps: 229 | - name: Apply standard cisagov job preamble 230 | uses: cisagov/action-job-preamble@v1 231 | with: 232 | # This functionality is poorly implemented and has been 233 | # causing problems due to the MITM implementation hogging or 234 | # leaking memory. As a result we disable it by default. If 235 | # you want to temporarily enable it, simply set 236 | # monitor_permissions equal to "true". 237 | # 238 | # TODO: Re-enable this functionality when practical. See 239 | # cisagov/skeleton-python-library#149 for more details. 240 | monitor_permissions: "false" 241 | # Use a variable to specify the permissions monitoring 242 | # configuration. By default this will yield the 243 | # configuration stored in the cisagov organization-level 244 | # variable, but if you want to use a different configuration 245 | # then simply: 246 | # 1. Create a repository-level variable with the name 247 | # ACTIONS_PERMISSIONS_CONFIG. 248 | # 2. Set this new variable's value to the configuration you 249 | # want to use for this repository. 250 | # 251 | # Note in particular that changing the permissions 252 | # monitoring configuration *does not* require you to modify 253 | # this workflow. 254 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 255 | - uses: actions/checkout@v4 256 | - id: setup-python 257 | uses: actions/setup-python@v5 258 | with: 259 | python-version: ${{ matrix.python-version }} 260 | - uses: actions/cache@v4 261 | env: 262 | BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\ 263 | py${{ steps.setup-python.outputs.python-version }}- 264 | with: 265 | path: ${{ env.PIP_CACHE_DIR }} 266 | # We do not use '**/setup.py' in the cache key so only the 'setup.py' 267 | # file in the root of the repository is used. This is in case a Python 268 | # package were to have a 'setup.py' as part of its internal codebase. 269 | key: ${{ env.BASE_CACHE_KEY }}\ 270 | ${{ hashFiles('**/requirements-test.txt') }}-\ 271 | ${{ hashFiles('**/requirements.txt') }}-\ 272 | ${{ hashFiles('setup.py') }} 273 | restore-keys: | 274 | ${{ env.BASE_CACHE_KEY }} 275 | - name: Install dependencies 276 | run: | 277 | python -m pip install --upgrade pip 278 | pip install --upgrade --requirement requirements-test.txt 279 | - name: Run tests 280 | env: 281 | RELEASE_TAG: ${{ github.event.release.tag_name }} 282 | run: pytest 283 | - name: Upload coverage report 284 | uses: coverallsapp/github-action@v2 285 | with: 286 | flag-name: py${{ matrix.python-version }} 287 | parallel: true 288 | if: success() 289 | - name: Setup tmate debug session 290 | uses: mxschmitt/action-tmate@v3 291 | if: env.RUN_TMATE 292 | coveralls-finish: 293 | permissions: 294 | # actions/checkout needs this to fetch code 295 | contents: read 296 | runs-on: ubuntu-latest 297 | needs: 298 | - diagnostics 299 | - test 300 | steps: 301 | - name: Apply standard cisagov job preamble 302 | uses: cisagov/action-job-preamble@v1 303 | with: 304 | # This functionality is poorly implemented and has been 305 | # causing problems due to the MITM implementation hogging or 306 | # leaking memory. As a result we disable it by default. If 307 | # you want to temporarily enable it, simply set 308 | # monitor_permissions equal to "true". 309 | # 310 | # TODO: Re-enable this functionality when practical. See 311 | # cisagov/skeleton-python-library#149 for more details. 312 | monitor_permissions: "false" 313 | # Use a variable to specify the permissions monitoring 314 | # configuration. By default this will yield the 315 | # configuration stored in the cisagov organization-level 316 | # variable, but if you want to use a different configuration 317 | # then simply: 318 | # 1. Create a repository-level variable with the name 319 | # ACTIONS_PERMISSIONS_CONFIG. 320 | # 2. Set this new variable's value to the configuration you 321 | # want to use for this repository. 322 | # 323 | # Note in particular that changing the permissions 324 | # monitoring configuration *does not* require you to modify 325 | # this workflow. 326 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 327 | - uses: actions/checkout@v4 328 | - name: Finished coveralls reports 329 | uses: coverallsapp/github-action@v2 330 | with: 331 | parallel-finished: true 332 | - name: Setup tmate debug session 333 | uses: mxschmitt/action-tmate@v3 334 | if: env.RUN_TMATE 335 | build: 336 | name: build wheel - py${{ matrix.python-version }} 337 | needs: 338 | - diagnostics 339 | - lint 340 | - test 341 | permissions: 342 | # actions/checkout needs this to fetch code 343 | contents: read 344 | runs-on: ${{ matrix.os }} 345 | strategy: 346 | fail-fast: false 347 | matrix: 348 | include: 349 | - os: ubuntu-22.04 350 | python-version: "3.7" 351 | os: 352 | - ubuntu-latest 353 | python-version: 354 | - "3.8" 355 | - "3.9" 356 | - "3.10" 357 | # - "3.11" 358 | # - "3.12" 359 | # - "3.13" 360 | steps: 361 | - name: Apply standard cisagov job preamble 362 | uses: cisagov/action-job-preamble@v1 363 | with: 364 | # This functionality is poorly implemented and has been 365 | # causing problems due to the MITM implementation hogging or 366 | # leaking memory. As a result we disable it by default. If 367 | # you want to temporarily enable it, simply set 368 | # monitor_permissions equal to "true". 369 | # 370 | # TODO: Re-enable this functionality when practical. See 371 | # cisagov/skeleton-python-library#149 for more details. 372 | monitor_permissions: "false" 373 | # Use a variable to specify the permissions monitoring 374 | # configuration. By default this will yield the 375 | # configuration stored in the cisagov organization-level 376 | # variable, but if you want to use a different configuration 377 | # then simply: 378 | # 1. Create a repository-level variable with the name 379 | # ACTIONS_PERMISSIONS_CONFIG. 380 | # 2. Set this new variable's value to the configuration you 381 | # want to use for this repository. 382 | # 383 | # Note in particular that changing the permissions 384 | # monitoring configuration *does not* require you to modify 385 | # this workflow. 386 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 387 | - uses: actions/checkout@v4 388 | - id: setup-python 389 | uses: actions/setup-python@v5 390 | with: 391 | python-version: ${{ matrix.python-version }} 392 | - uses: actions/cache@v4 393 | env: 394 | BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\ 395 | py${{ steps.setup-python.outputs.python-version }}- 396 | with: 397 | path: ${{ env.PIP_CACHE_DIR }} 398 | # We do not use '**/setup.py' in the cache key so only the 'setup.py' 399 | # file in the root of the repository is used. This is in case a Python 400 | # package were to have a 'setup.py' as part of its internal codebase. 401 | key: ${{ env.BASE_CACHE_KEY }}\ 402 | ${{ hashFiles('**/requirements.txt') }}-\ 403 | ${{ hashFiles('setup.py') }} 404 | restore-keys: | 405 | ${{ env.BASE_CACHE_KEY }} 406 | - name: Install build dependencies 407 | run: | 408 | python -m pip install --upgrade pip setuptools wheel 409 | python -m pip install --upgrade build 410 | - name: Build artifacts 411 | run: python -m build 412 | - name: Upload artifacts 413 | uses: actions/upload-artifact@v4 414 | with: 415 | name: dist-${{ matrix.python-version }} 416 | path: dist 417 | - name: Setup tmate debug session 418 | uses: mxschmitt/action-tmate@v3 419 | if: env.RUN_TMATE 420 | test-build: 421 | name: test built wheel - py${{ matrix.python-version }} 422 | needs: 423 | - diagnostics 424 | - build 425 | permissions: 426 | # actions/checkout needs this to fetch code 427 | contents: read 428 | runs-on: ${{ matrix.os }} 429 | strategy: 430 | fail-fast: false 431 | matrix: 432 | include: 433 | - os: ubuntu-22.04 434 | python-version: "3.7" 435 | os: 436 | - ubuntu-latest 437 | python-version: 438 | - "3.8" 439 | - "3.9" 440 | - "3.10" 441 | # - "3.11" 442 | # - "3.12" 443 | # - "3.13" 444 | steps: 445 | - name: Apply standard cisagov job preamble 446 | uses: cisagov/action-job-preamble@v1 447 | with: 448 | # This functionality is poorly implemented and has been 449 | # causing problems due to the MITM implementation hogging or 450 | # leaking memory. As a result we disable it by default. If 451 | # you want to temporarily enable it, simply set 452 | # monitor_permissions equal to "true". 453 | # 454 | # TODO: Re-enable this functionality when practical. See 455 | # cisagov/skeleton-python-library#149 for more details. 456 | monitor_permissions: "false" 457 | # Use a variable to specify the permissions monitoring 458 | # configuration. By default this will yield the 459 | # configuration stored in the cisagov organization-level 460 | # variable, but if you want to use a different configuration 461 | # then simply: 462 | # 1. Create a repository-level variable with the name 463 | # ACTIONS_PERMISSIONS_CONFIG. 464 | # 2. Set this new variable's value to the configuration you 465 | # want to use for this repository. 466 | # 467 | # Note in particular that changing the permissions 468 | # monitoring configuration *does not* require you to modify 469 | # this workflow. 470 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 471 | - uses: actions/checkout@v4 472 | - id: setup-python 473 | uses: actions/setup-python@v5 474 | with: 475 | python-version: ${{ matrix.python-version }} 476 | - uses: actions/cache@v4 477 | env: 478 | BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\ 479 | py${{ steps.setup-python.outputs.python-version }}- 480 | with: 481 | path: ${{ env.PIP_CACHE_DIR }} 482 | # We do not use '**/setup.py' in the cache key so only the 'setup.py' 483 | # file in the root of the repository is used. This is in case a Python 484 | # package were to have a 'setup.py' as part of its internal codebase. 485 | key: ${{ env.BASE_CACHE_KEY }}\ 486 | ${{ hashFiles('**/requirements.txt') }}-\ 487 | ${{ hashFiles('setup.py') }} 488 | restore-keys: | 489 | ${{ env.BASE_CACHE_KEY }} 490 | - name: Retrieve the built wheel 491 | uses: actions/download-artifact@v4 492 | with: 493 | name: dist-${{ matrix.python-version }} 494 | path: dist 495 | - id: find-wheel 496 | name: Get the name of the retrieved wheel (there should only be one) 497 | run: echo "wheel=$(ls dist/*whl)" >> $GITHUB_OUTPUT 498 | - name: Update core Python packages 499 | run: python -m pip install --upgrade pip setuptools wheel 500 | - name: Install the built wheel (along with testing dependencies) 501 | run: python -m pip install ${{ steps.find-wheel.outputs.wheel }}[test] 502 | - name: Run tests 503 | env: 504 | RELEASE_TAG: ${{ github.event.release.tag_name }} 505 | run: pytest 506 | - name: Setup tmate debug session 507 | uses: mxschmitt/action-tmate@v3 508 | if: env.RUN_TMATE 509 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # For most projects, this workflow file will not need changing; you simply need 3 | # to commit it to your repository. 4 | # 5 | # You may wish to alter this file to override the set of languages analyzed, 6 | # or to provide custom queries or build logic. 7 | name: CodeQL 8 | 9 | # The use of on here as a key is part of the GitHub actions syntax. 10 | # yamllint disable-line rule:truthy 11 | on: 12 | merge_group: 13 | types: 14 | - checks_requested 15 | pull_request: 16 | # The branches here must be a subset of the ones in the push key 17 | branches: 18 | - develop 19 | push: 20 | # Dependabot-triggered push events have read-only access, but uploading code 21 | # scanning requires write access. 22 | branches-ignore: 23 | - dependabot/** 24 | schedule: 25 | - cron: 0 14 * * 6 26 | 27 | jobs: 28 | diagnostics: 29 | name: Run diagnostics 30 | # This job does not need any permissions 31 | permissions: {} 32 | runs-on: ubuntu-latest 33 | steps: 34 | # Note that a duplicate of this step must be added at the top of 35 | # each job. 36 | - name: Apply standard cisagov job preamble 37 | uses: cisagov/action-job-preamble@v1 38 | with: 39 | check_github_status: "true" 40 | # This functionality is poorly implemented and has been 41 | # causing problems due to the MITM implementation hogging or 42 | # leaking memory. As a result we disable it by default. If 43 | # you want to temporarily enable it, simply set 44 | # monitor_permissions equal to "true". 45 | # 46 | # TODO: Re-enable this functionality when practical. See 47 | # cisagov/skeleton-generic#207 for more details. 48 | monitor_permissions: "false" 49 | output_workflow_context: "true" 50 | # Use a variable to specify the permissions monitoring 51 | # configuration. By default this will yield the 52 | # configuration stored in the cisagov organization-level 53 | # variable, but if you want to use a different configuration 54 | # then simply: 55 | # 1. Create a repository-level variable with the name 56 | # ACTIONS_PERMISSIONS_CONFIG. 57 | # 2. Set this new variable's value to the configuration you 58 | # want to use for this repository. 59 | # 60 | # Note in particular that changing the permissions 61 | # monitoring configuration *does not* require you to modify 62 | # this workflow. 63 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 64 | analyze: 65 | name: Analyze 66 | needs: 67 | - diagnostics 68 | permissions: 69 | # actions/checkout needs this to fetch code 70 | contents: read 71 | # required for all workflows 72 | security-events: write 73 | runs-on: ubuntu-latest 74 | strategy: 75 | fail-fast: false 76 | matrix: 77 | # Override automatic language detection by changing the below 78 | # list 79 | # 80 | # Supported options are actions, c-cpp, csharp, go, 81 | # java-kotlin, javascript-typescript, python, ruby, and swift. 82 | language: 83 | - actions 84 | - python 85 | # Learn more... 86 | # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection 87 | 88 | steps: 89 | - name: Apply standard cisagov job preamble 90 | uses: cisagov/action-job-preamble@v1 91 | with: 92 | # This functionality is poorly implemented and has been 93 | # causing problems due to the MITM implementation hogging or 94 | # leaking memory. As a result we disable it by default. If 95 | # you want to temporarily enable it, simply set 96 | # monitor_permissions equal to "true". 97 | # 98 | # TODO: Re-enable this functionality when practical. See 99 | # cisagov/skeleton-generic#207 for more details. 100 | monitor_permissions: "false" 101 | # Use a variable to specify the permissions monitoring 102 | # configuration. By default this will yield the 103 | # configuration stored in the cisagov organization-level 104 | # variable, but if you want to use a different configuration 105 | # then simply: 106 | # 1. Create a repository-level variable with the name 107 | # ACTIONS_PERMISSIONS_CONFIG. 108 | # 2. Set this new variable's value to the configuration you 109 | # want to use for this repository. 110 | # 111 | # Note in particular that changing the permissions 112 | # monitoring configuration *does not* require you to modify 113 | # this workflow. 114 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 115 | 116 | - name: Checkout repository 117 | uses: actions/checkout@v4 118 | 119 | # Initializes the CodeQL tools for scanning. 120 | - name: Initialize CodeQL 121 | uses: github/codeql-action/init@v3 122 | with: 123 | languages: ${{ matrix.language }} 124 | 125 | # Autobuild attempts to build any compiled languages (C/C++, C#, or 126 | # Java). If this step fails, then you should remove it and run the build 127 | # manually (see below). 128 | - name: Autobuild 129 | uses: github/codeql-action/autobuild@v3 130 | 131 | # ℹ️ Command-line programs to run using the OS shell. 132 | # 📚 https://git.io/JvXDl 133 | 134 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 135 | # three lines and modify them (or add more) to build your code if your 136 | # project uses a compiled language 137 | 138 | # - run: | 139 | # make bootstrap 140 | # make release 141 | 142 | - name: Perform CodeQL Analysis 143 | uses: github/codeql-action/analyze@v3 144 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Dependency review 3 | 4 | on: # yamllint disable-line rule:truthy 5 | merge_group: 6 | types: 7 | - checks_requested 8 | pull_request: 9 | 10 | # Set a default shell for any run steps. The `-Eueo pipefail` sets errtrace, 11 | # nounset, errexit, and pipefail. The `-x` will print all commands as they are 12 | # run. Please see the GitHub Actions documentation for more information: 13 | # https://docs.github.com/en/actions/using-jobs/setting-default-values-for-jobs 14 | defaults: 15 | run: 16 | shell: bash -Eueo pipefail -x {0} 17 | 18 | jobs: 19 | diagnostics: 20 | name: Run diagnostics 21 | # This job does not need any permissions 22 | permissions: {} 23 | runs-on: ubuntu-latest 24 | steps: 25 | # Note that a duplicate of this step must be added at the top of 26 | # each job. 27 | - name: Apply standard cisagov job preamble 28 | uses: cisagov/action-job-preamble@v1 29 | with: 30 | check_github_status: "true" 31 | # This functionality is poorly implemented and has been 32 | # causing problems due to the MITM implementation hogging or 33 | # leaking memory. As a result we disable it by default. If 34 | # you want to temporarily enable it, simply set 35 | # monitor_permissions equal to "true". 36 | # 37 | # TODO: Re-enable this functionality when practical. See 38 | # cisagov/skeleton-generic#207 for more details. 39 | monitor_permissions: "false" 40 | output_workflow_context: "true" 41 | # Use a variable to specify the permissions monitoring 42 | # configuration. By default this will yield the 43 | # configuration stored in the cisagov organization-level 44 | # variable, but if you want to use a different configuration 45 | # then simply: 46 | # 1. Create a repository-level variable with the name 47 | # ACTIONS_PERMISSIONS_CONFIG. 48 | # 2. Set this new variable's value to the configuration you 49 | # want to use for this repository. 50 | # 51 | # Note in particular that changing the permissions 52 | # monitoring configuration *does not* require you to modify 53 | # this workflow. 54 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 55 | dependency-review: 56 | name: Dependency review 57 | needs: 58 | - diagnostics 59 | permissions: 60 | # actions/checkout needs this to fetch code 61 | contents: read 62 | runs-on: ubuntu-latest 63 | steps: 64 | - name: Apply standard cisagov job preamble 65 | uses: cisagov/action-job-preamble@v1 66 | with: 67 | # This functionality is poorly implemented and has been 68 | # causing problems due to the MITM implementation hogging or 69 | # leaking memory. As a result we disable it by default. If 70 | # you want to temporarily enable it, simply set 71 | # monitor_permissions equal to "true". 72 | # 73 | # TODO: Re-enable this functionality when practical. See 74 | # cisagov/skeleton-generic#207 for more details. 75 | monitor_permissions: "false" 76 | # Use a variable to specify the permissions monitoring 77 | # configuration. By default this will yield the 78 | # configuration stored in the cisagov organization-level 79 | # variable, but if you want to use a different configuration 80 | # then simply: 81 | # 1. Create a repository-level variable with the name 82 | # ACTIONS_PERMISSIONS_CONFIG. 83 | # 2. Set this new variable's value to the configuration you 84 | # want to use for this repository. 85 | # 86 | # Note in particular that changing the permissions 87 | # monitoring configuration *does not* require you to modify 88 | # this workflow. 89 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 90 | - id: checkout-repo 91 | name: Checkout the repository 92 | uses: actions/checkout@v4 93 | - id: dependency-review 94 | name: Review dependency changes for vulnerabilities and license changes 95 | uses: actions/dependency-review-action@v4 96 | -------------------------------------------------------------------------------- /.github/workflows/sync-labels.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: sync-labels 3 | 4 | on: # yamllint disable-line rule:truthy 5 | push: 6 | paths: 7 | - .github/labels.yml 8 | - .github/workflows/sync-labels.yml 9 | workflow_dispatch: 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | diagnostics: 16 | name: Run diagnostics 17 | # This job does not need any permissions 18 | permissions: {} 19 | runs-on: ubuntu-latest 20 | steps: 21 | # Note that a duplicate of this step must be added at the top of 22 | # each job. 23 | - name: Apply standard cisagov job preamble 24 | uses: cisagov/action-job-preamble@v1 25 | with: 26 | check_github_status: "true" 27 | # This functionality is poorly implemented and has been 28 | # causing problems due to the MITM implementation hogging or 29 | # leaking memory. As a result we disable it by default. If 30 | # you want to temporarily enable it, simply set 31 | # monitor_permissions equal to "true". 32 | # 33 | # TODO: Re-enable this functionality when practical. See 34 | # cisagov/skeleton-generic#207 for more details. 35 | monitor_permissions: "false" 36 | output_workflow_context: "true" 37 | # Use a variable to specify the permissions monitoring 38 | # configuration. By default this will yield the 39 | # configuration stored in the cisagov organization-level 40 | # variable, but if you want to use a different configuration 41 | # then simply: 42 | # 1. Create a repository-level variable with the name 43 | # ACTIONS_PERMISSIONS_CONFIG. 44 | # 2. Set this new variable's value to the configuration you 45 | # want to use for this repository. 46 | # 47 | # Note in particular that changing the permissions 48 | # monitoring configuration *does not* require you to modify 49 | # this workflow. 50 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 51 | labeler: 52 | needs: 53 | - diagnostics 54 | permissions: 55 | # actions/checkout needs this to fetch code 56 | contents: read 57 | # crazy-max/ghaction-github-labeler needs this to manage repository labels 58 | issues: write 59 | runs-on: ubuntu-latest 60 | steps: 61 | - name: Apply standard cisagov job preamble 62 | uses: cisagov/action-job-preamble@v1 63 | with: 64 | # This functionality is poorly implemented and has been 65 | # causing problems due to the MITM implementation hogging or 66 | # leaking memory. As a result we disable it by default. If 67 | # you want to temporarily enable it, simply set 68 | # monitor_permissions equal to "true". 69 | # 70 | # TODO: Re-enable this functionality when practical. See 71 | # cisagov/skeleton-generic#207 for more details. 72 | monitor_permissions: "false" 73 | # Use a variable to specify the permissions monitoring 74 | # configuration. By default this will yield the 75 | # configuration stored in the cisagov organization-level 76 | # variable, but if you want to use a different configuration 77 | # then simply: 78 | # 1. Create a repository-level variable with the name 79 | # ACTIONS_PERMISSIONS_CONFIG. 80 | # 2. Set this new variable's value to the configuration you 81 | # want to use for this repository. 82 | # 83 | # Note in particular that changing the permissions 84 | # monitoring configuration *does not* require you to modify 85 | # this workflow. 86 | permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }} 87 | - uses: actions/checkout@v4 88 | - name: Sync repository labels 89 | if: success() 90 | uses: crazy-max/ghaction-github-labeler@v5 91 | with: 92 | # This is a hideous ternary equivalent so we only do a dry run unless 93 | # this workflow is triggered by the develop branch. 94 | dry-run: ${{ github.ref_name == 'develop' && 'false' || 'true' }} 95 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # This file specifies intentionally untracked files that Git should ignore. 2 | # Files already tracked by Git are not affected. 3 | # See: https://git-scm.com/docs/gitignore 4 | 5 | ## Python ## 6 | __pycache__ 7 | .coverage 8 | .mypy_cache 9 | .pytest_cache 10 | .python-version 11 | *.egg-info 12 | dist 13 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | combine_star=true 3 | force_sort_within_sections=true 4 | 5 | import_heading_stdlib=Standard Python Libraries 6 | import_heading_thirdparty=Third-Party Libraries 7 | import_heading_firstparty=cisagov Libraries 8 | 9 | # Run isort under the black profile to align with our other Python linting 10 | profile=black 11 | -------------------------------------------------------------------------------- /.mdl_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # Default state for all rules 4 | default: true 5 | 6 | # MD003/heading-style/header-style - Heading style 7 | MD003: 8 | # Enforce the ATX-closed style of header 9 | style: atx_closed 10 | 11 | # MD004/ul-style - Unordered list style 12 | MD004: 13 | # Enforce dashes for unordered lists 14 | style: dash 15 | 16 | # MD013/line-length - Line length 17 | MD013: 18 | # Do not enforce for code blocks 19 | code_blocks: false 20 | # Do not enforce for tables 21 | tables: false 22 | 23 | # MD024/no-duplicate-heading/no-duplicate-header - Multiple headings with the 24 | # same content 25 | MD024: 26 | # Allow headers with the same content as long as they are not in the same 27 | # parent heading 28 | allow_different_nesting: true 29 | 30 | # MD029/ol-prefix - Ordered list item prefix 31 | MD029: 32 | # Enforce the `1.` style for ordered lists 33 | style: one 34 | 35 | # MD033/no-inline-html - Inline HTML 36 | MD033: 37 | # The h1 and img elements are allowed to permit header images 38 | allowed_elements: 39 | - h1 40 | - img 41 | 42 | # MD035/hr-style - Horizontal rule style 43 | MD035: 44 | # Enforce dashes for horizontal rules 45 | style: --- 46 | 47 | # MD046/code-block-style - Code block style 48 | MD046: 49 | # Enforce the fenced style for code blocks 50 | style: fenced 51 | 52 | # MD049/emphasis-style - Emphasis style should be consistent 53 | MD049: 54 | # Enforce asterisks as the style to use for emphasis 55 | style: asterisk 56 | 57 | # MD050/strong-style - Strong style should be consistent 58 | MD050: 59 | # Enforce asterisks as the style to use for strong 60 | style: asterisk 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | default_language_version: 3 | # force all unspecified python hooks to run python3 4 | python: python3 5 | 6 | repos: 7 | # Check the pre-commit configuration 8 | - repo: meta 9 | hooks: 10 | - id: check-useless-excludes 11 | 12 | - repo: https://github.com/pre-commit/pre-commit-hooks 13 | rev: v5.0.0 14 | hooks: 15 | - id: check-case-conflict 16 | - id: check-executables-have-shebangs 17 | - id: check-json 18 | - id: check-merge-conflict 19 | - id: check-shebang-scripts-are-executable 20 | - id: check-symlinks 21 | - id: check-toml 22 | - id: check-vcs-permalinks 23 | - id: check-xml 24 | - id: debug-statements 25 | - id: destroyed-symlinks 26 | - id: detect-aws-credentials 27 | args: 28 | - --allow-missing-credentials 29 | - id: detect-private-key 30 | - id: end-of-file-fixer 31 | - id: mixed-line-ending 32 | args: 33 | - --fix=lf 34 | - id: pretty-format-json 35 | args: 36 | - --autofix 37 | - id: requirements-txt-fixer 38 | - id: trailing-whitespace 39 | 40 | # Text file hooks 41 | - repo: https://github.com/igorshubovych/markdownlint-cli 42 | rev: v0.44.0 43 | hooks: 44 | - id: markdownlint 45 | args: 46 | - --config=.mdl_config.yaml 47 | - repo: https://github.com/rbubley/mirrors-prettier 48 | rev: v3.5.3 49 | hooks: 50 | - id: prettier 51 | - repo: https://github.com/adrienverge/yamllint 52 | rev: v1.37.0 53 | hooks: 54 | - id: yamllint 55 | args: 56 | - --strict 57 | 58 | # GitHub Actions hooks 59 | - repo: https://github.com/python-jsonschema/check-jsonschema 60 | rev: 0.32.1 61 | hooks: 62 | - id: check-github-actions 63 | - id: check-github-workflows 64 | 65 | # pre-commit hooks 66 | - repo: https://github.com/pre-commit/pre-commit 67 | # pre-commit v3+ dropped support for Python <3.8. Until this project and 68 | # the build.yml workflow can migrate to Python 3.8 or newer we must 69 | # continue to use an older version. 70 | rev: v2.21.0 71 | hooks: 72 | - id: validate_manifest 73 | 74 | # Go hooks 75 | - repo: https://github.com/TekWizely/pre-commit-golang 76 | rev: v1.0.0-rc.1 77 | hooks: 78 | # Go Build 79 | - id: go-build-repo-mod 80 | # Style Checkers 81 | - id: go-critic 82 | # goimports 83 | - id: go-imports-repo 84 | args: 85 | # Write changes to files 86 | - -w 87 | # Go Mod Tidy 88 | - id: go-mod-tidy-repo 89 | # GoSec 90 | - id: go-sec-repo-mod 91 | # StaticCheck 92 | - id: go-staticcheck-repo-mod 93 | # Go Test 94 | - id: go-test-repo-mod 95 | # Go Vet 96 | - id: go-vet-repo-mod 97 | # Nix hooks 98 | - repo: https://github.com/nix-community/nixpkgs-fmt 99 | rev: v1.3.0 100 | hooks: 101 | - id: nixpkgs-fmt 102 | 103 | # Shell script hooks 104 | - repo: https://github.com/scop/pre-commit-shfmt 105 | rev: v3.11.0-1 106 | hooks: 107 | - id: shfmt 108 | args: 109 | # List files that will be formatted 110 | - --list 111 | # Write result to file instead of stdout 112 | - --write 113 | # Indent by two spaces 114 | - --indent 115 | - "2" 116 | # Binary operators may start a line 117 | - --binary-next-line 118 | # Switch cases are indented 119 | - --case-indent 120 | # Redirect operators are followed by a space 121 | - --space-redirects 122 | - repo: https://github.com/shellcheck-py/shellcheck-py 123 | rev: v0.10.0.1 124 | hooks: 125 | - id: shellcheck 126 | 127 | # Python hooks 128 | # Run bandit on the "tests" tree with a configuration 129 | - repo: https://github.com/PyCQA/bandit 130 | # bandit 1.7.6 dropped support for Python <3.8. Until this project 131 | # and the build.yml workflow can migrate to Python 3.8 or newer we 132 | # must continue to use an older version. 133 | rev: 1.7.5 134 | hooks: 135 | - id: bandit 136 | name: bandit (tests tree) 137 | files: tests 138 | args: 139 | - --config=.bandit.yml 140 | additional_dependencies: 141 | - importlib-metadata<5 142 | # Run bandit on everything except the "tests" tree 143 | - repo: https://github.com/PyCQA/bandit 144 | # bandit 1.7.6 dropped support for Python <3.8. Until this project 145 | # and the build.yml workflow can migrate to Python 3.8 or newer we 146 | # must continue to use an older version. 147 | rev: 1.7.5 148 | hooks: 149 | - id: bandit 150 | name: bandit (everything else) 151 | exclude: tests 152 | additional_dependencies: 153 | - importlib-metadata<5 154 | - repo: https://github.com/psf/black-pre-commit-mirror 155 | rev: 25.1.0 156 | hooks: 157 | - id: black 158 | - repo: https://github.com/PyCQA/flake8 159 | # flake8 v6+ dropped support for Python <3.8. Until this project and 160 | # the build.yml workflow can migrate to Python 3.8 or newer we must 161 | # continue to use an older version. 162 | rev: 5.0.4 163 | hooks: 164 | - id: flake8 165 | additional_dependencies: 166 | - flake8-docstrings==1.7.0 167 | - repo: https://github.com/PyCQA/isort 168 | # isort 5.12.0 dropped support for Python <3.8. Until this project and 169 | # the build.yml workflow can migrate to Python 3.8 or newer we must 170 | # continue to use an older version. 171 | rev: 5.11.5 172 | hooks: 173 | - id: isort 174 | - repo: https://github.com/pre-commit/mirrors-mypy 175 | # mypy 1.5.0 dropped support for Python <3.8. Until this project 176 | # and the build.yml workflow can migrate to Python 3.8 or newer we 177 | # must continue to use an older version. 178 | rev: v1.4.1 179 | hooks: 180 | - id: mypy 181 | # IMPORTANT: Keep type hinting-related dependencies of the 182 | # mypy pre-commit hook additional_dependencies in sync with 183 | # the dev section of setup.py to avoid discrepancies in type 184 | # checking between environments. 185 | additional_dependencies: 186 | - pytest 187 | - pytablewriter 188 | - types-docopt 189 | - types-pyOpenSSL 190 | - types-requests 191 | - types-setuptools 192 | - types-urllib3 193 | # Override the default arguments to drop the --ignore-missing-imports 194 | # option to enforce a complete mypy configuration. 195 | args: 196 | - --scripts-are-modules 197 | # pip-audit turns up several vulnerabilities for cryptography, but 198 | # we cannot pull in a newer version of that library because we 199 | # can't currently support any version of Python later than 3.10. 200 | # - repo: https://github.com/pypa/pip-audit 201 | # rev: v2.7.3 202 | # hooks: 203 | # - id: pip-audit 204 | # args: 205 | # # Add any pip requirements files to scan 206 | # - --requirement 207 | # - requirements-dev.txt 208 | # - --requirement 209 | # - requirements-test.txt 210 | # - --requirement 211 | # - requirements.txt 212 | - repo: https://github.com/asottile/pyupgrade 213 | # pyupgrade no longer supports Python 3.7 as of version 3.4.0, so 214 | # we cannot upgrade past the 3.3.2 release: 215 | # https://github.com/asottile/pyupgrade/blob/v3.4.0/setup.cfg#L23 216 | rev: v3.3.2 217 | hooks: 218 | - id: pyupgrade 219 | 220 | # Ansible hooks 221 | # - repo: https://github.com/ansible/ansible-lint 222 | # # ansible-lint no longer supports Python 3.7 as of version 6.0, so 223 | # # we cannot upgrade past the 5.4.0 release: 224 | # # https://github.com/ansible/ansible-lint/releases/tag/v6.0.0 225 | # # 226 | # # But the 5.4.0 release causes a different failure because the 227 | # # version of ansible isn't correctly pinned. The best way forward 228 | # # is to simply comment out this pre-commit hook until we can move 229 | # # to Python >3.7. 230 | # rev: v25.1.3 231 | # hooks: 232 | # - id: ansible-lint 233 | # additional_dependencies: 234 | # # On its own ansible-lint does not pull in ansible, only 235 | # # ansible-core. Therefore, if an Ansible module lives in 236 | # # ansible instead of ansible-core, the linter will complain 237 | # # that the module is unknown. In these cases it is 238 | # # necessary to add the ansible package itself as an 239 | # # additional dependency, with the same pinning as is done in 240 | # # requirements-test.txt of cisagov/skeleton-ansible-role. 241 | # # 242 | # # Version 10 is required because the pip-audit pre-commit 243 | # # hook identifies a vulnerability in ansible-core 2.16.13, 244 | # # but all versions of ansible 9 have a dependency on 245 | # # ~=2.16.X. 246 | # # 247 | # # It is also a good idea to go ahead and upgrade to version 248 | # # 10 since version 9 is going EOL at the end of November: 249 | # # https://endoflife.date/ansible 250 | # # - ansible>=10,<11 251 | # # ansible-core 2.16.3 through 2.16.6 suffer from the bug 252 | # # discussed in ansible/ansible#82702, which breaks any 253 | # # symlinked files in vars, tasks, etc. for any Ansible role 254 | # # installed via ansible-galaxy. Hence we never want to 255 | # # install those versions. 256 | # # 257 | # # Note that the pip-audit pre-commit hook identifies a 258 | # # vulnerability in ansible-core 2.16.13. The pin of 259 | # # ansible-core to >=2.17 effectively also pins ansible to 260 | # # >=10. 261 | # # 262 | # # It is also a good idea to go ahead and upgrade to 263 | # # ansible-core 2.17 since security support for ansible-core 264 | # # 2.16 ends this month: 265 | # yamllint disable-line rule:line-length 266 | # # https://docs.ansible.com/ansible/devel/reference_appendices/release_and_maintenance.html#ansible-core-support-matrix 267 | # # 268 | # # Note that any changes made to this dependency must also be 269 | # # made in requirements.txt in cisagov/skeleton-packer and 270 | # # requirements-test.txt in cisagov/skeleton-ansible-role. 271 | # - ansible-core>=2.17 272 | 273 | # Terraform hooks 274 | - repo: https://github.com/antonbabenko/pre-commit-terraform 275 | rev: v1.98.0 276 | hooks: 277 | - id: terraform_fmt 278 | - id: terraform_validate 279 | 280 | # Docker hooks 281 | - repo: https://github.com/IamTheFij/docker-pre-commit 282 | rev: v3.0.1 283 | hooks: 284 | - id: docker-compose-check 285 | 286 | # Packer hooks 287 | - repo: https://github.com/cisagov/pre-commit-packer 288 | rev: v0.3.0 289 | hooks: 290 | - id: packer_fmt 291 | - id: packer_validate 292 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | # Already being linted by pretty-format-json 2 | *.json 3 | # Already being linted by mdl 4 | *.md 5 | # Already being linted by yamllint 6 | *.yaml 7 | *.yml 8 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | 4 | rules: 5 | braces: 6 | # Do not allow non-empty flow mappings 7 | forbid: non-empty 8 | # Allow up to one space inside braces. This is required for Ansible compatibility. 9 | max-spaces-inside: 1 10 | 11 | brackets: 12 | # Do not allow non-empty flow sequences 13 | forbid: non-empty 14 | 15 | comments: 16 | # Ensure that inline comments have at least one space before the preceding content. 17 | # This is required for Ansible compatibility. 18 | min-spaces-from-content: 1 19 | 20 | # yamllint does not like it when you comment out different parts of 21 | # dictionaries in a list. You can see 22 | # https://github.com/adrienverge/yamllint/issues/384 for some examples of 23 | # this behavior. 24 | comments-indentation: disable 25 | 26 | indentation: 27 | # Ensure that block sequences inside of a mapping are indented 28 | indent-sequences: true 29 | # Enforce a specific number of spaces 30 | spaces: 2 31 | 32 | # yamllint does not allow inline mappings that exceed the line length by 33 | # default. There are many scenarios where the inline mapping may be a key, 34 | # hash, or other long value that would exceed the line length but cannot 35 | # reasonably be broken across lines. 36 | line-length: 37 | # This rule implies the allow-non-breakable-words rule 38 | allow-non-breakable-inline-mappings: true 39 | # Allows a 10% overage from the default limit of 80 40 | max: 88 41 | 42 | # Using anything other than strings to express octal values can lead to unexpected 43 | # and potentially unsafe behavior. Ansible strongly recommends against such practices 44 | # and these rules are needed for Ansible compatibility. Please see the following for 45 | # more information: 46 | # https://ansible.readthedocs.io/projects/lint/rules/risky-octal/ 47 | octal-values: 48 | # Do not allow explicit octal values (those beginning with a leading 0o). 49 | forbid-explicit-octal: true 50 | # Do not allow implicit octal values (those beginning with a leading 0). 51 | forbid-implicit-octal: true 52 | 53 | quoted-strings: 54 | # Allow disallowed quotes (single quotes) for strings that contain allowed quotes 55 | # (double quotes). 56 | allow-quoted-quotes: true 57 | # Apply these rules to keys in mappings as well 58 | check-keys: true 59 | # We prefer double quotes for strings when they are needed 60 | quote-type: double 61 | # Only require quotes when they are necessary for proper processing 62 | required: only-when-needed 63 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome # 2 | 3 | We're so glad you're thinking about contributing to this open source 4 | project! If you're unsure or afraid of anything, just ask or submit 5 | the issue or pull request anyway. The worst that can happen is that 6 | you'll be politely asked to change something. We appreciate any sort 7 | of contribution, and don't want a wall of rules to get in the way of 8 | that. 9 | 10 | Before contributing, we encourage you to read our CONTRIBUTING policy 11 | (you are here), our [LICENSE](LICENSE), and our [README](README.md), 12 | all of which should be in this repository. 13 | 14 | ## Issues ## 15 | 16 | If you want to report a bug or request a new feature, the most direct 17 | method is to [create an 18 | issue](https://github.com/cisagov/pshtt/issues) in 19 | this repository. We recommend that you first search through existing 20 | issues (both open and closed) to check if your particular issue has 21 | already been reported. If it has then you might want to add a comment 22 | to the existing issue. If it hasn't then feel free to create a new 23 | one. 24 | 25 | ## Pull requests ## 26 | 27 | If you choose to [submit a pull 28 | request](https://github.com/cisagov/pshtt/pulls), 29 | you will notice that our continuous integration (CI) system runs a 30 | fairly extensive set of linters, syntax checkers, system, and unit tests. 31 | Your pull request may fail these checks, and that's OK. If you want 32 | you can stop there and wait for us to make the necessary corrections 33 | to ensure your code passes the CI checks. 34 | 35 | If you want to make the changes yourself, or if you want to become a 36 | regular contributor, then you will want to set up 37 | [pre-commit](https://pre-commit.com/) on your local machine. Once you 38 | do that, the CI checks will run locally before you even write your 39 | commit message. This speeds up your development cycle considerably. 40 | 41 | ### Setting up pre-commit ### 42 | 43 | There are a few ways to do this, but we prefer to use 44 | [`pyenv`](https://github.com/pyenv/pyenv) and 45 | [`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv) to 46 | create and manage a Python virtual environment specific to this 47 | project. 48 | 49 | We recommend using the `setup-env` script located in this repository, 50 | as it automates the entire environment configuration process. The 51 | dependencies required to run this script are 52 | [GNU `getopt`](https://github.com/util-linux/util-linux/blob/master/misc-utils/getopt.1.adoc), 53 | [`pyenv`](https://github.com/pyenv/pyenv), and [`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv). 54 | If these tools are already configured on your system, you can simply run the 55 | following command: 56 | 57 | ```console 58 | ./setup-env 59 | ``` 60 | 61 | Otherwise, follow the steps below to manually configure your 62 | environment. 63 | 64 | #### Installing and using GNU `getopt`, `pyenv`, and `pyenv-virtualenv` #### 65 | 66 | On macOS, we recommend installing [brew](https://brew.sh/). Then 67 | installation is as simple as `brew install gnu-getopt pyenv pyenv-virtualenv` and 68 | adding this to your profile: 69 | 70 | ```bash 71 | # GNU getopt must be explicitly added to the path since it is 72 | # keg-only (https://docs.brew.sh/FAQ#what-does-keg-only-mean) 73 | export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH" 74 | 75 | # Setup pyenv 76 | export PYENV_ROOT="$HOME/.pyenv" 77 | export PATH="$PYENV_ROOT/bin:$PATH" 78 | eval "$(pyenv init --path)" 79 | eval "$(pyenv init -)" 80 | eval "$(pyenv virtualenv-init -)" 81 | ``` 82 | 83 | For Linux, Windows Subsystem for Linux (WSL), or macOS (if you 84 | don't want to use `brew`) you can use 85 | [pyenv/pyenv-installer](https://github.com/pyenv/pyenv-installer) to 86 | install the necessary tools. Before running this ensure that you have 87 | installed the prerequisites for your platform according to the 88 | [`pyenv` wiki 89 | page](https://github.com/pyenv/pyenv/wiki/common-build-problems). 90 | GNU `getopt` is included in most Linux distributions as part of the 91 | [`util-linux`](https://github.com/util-linux/util-linux) package. 92 | 93 | On WSL you should treat your platform as whatever Linux distribution 94 | you've chosen to install. 95 | 96 | Once you have installed `pyenv` you will need to add the following 97 | lines to your `.bash_profile` (or `.profile`): 98 | 99 | ```bash 100 | export PYENV_ROOT="$HOME/.pyenv" 101 | export PATH="$PYENV_ROOT/bin:$PATH" 102 | eval "$(pyenv init --path)" 103 | ``` 104 | 105 | and then add the following lines to your `.bashrc`: 106 | 107 | ```bash 108 | eval "$(pyenv init -)" 109 | eval "$(pyenv virtualenv-init -)" 110 | ``` 111 | 112 | If you want more information about setting up `pyenv` once installed, please run 113 | 114 | ```console 115 | pyenv init 116 | ``` 117 | 118 | and 119 | 120 | ```console 121 | pyenv virtualenv-init 122 | ``` 123 | 124 | for the current configuration instructions. 125 | 126 | If you are using a shell other than `bash` you should follow the 127 | instructions that the `pyenv-installer` script outputs. 128 | 129 | You will need to reload your shell for these changes to take effect so 130 | you can begin to use `pyenv`. 131 | 132 | For a list of Python versions that are already installed and ready to 133 | use with `pyenv`, use the command `pyenv versions`. To see a list of 134 | the Python versions available to be installed and used with `pyenv` 135 | use the command `pyenv install --list`. You can read more 136 | [here](https://github.com/pyenv/pyenv/blob/master/COMMANDS.md) about 137 | the many things that `pyenv` can do. See 138 | [here](https://github.com/pyenv/pyenv-virtualenv#usage) for the 139 | additional capabilities that pyenv-virtualenv adds to the `pyenv` 140 | command. 141 | 142 | #### Creating the Python virtual environment #### 143 | 144 | Once `pyenv` and `pyenv-virtualenv` are installed on your system, you 145 | can create and configure the Python virtual environment with these 146 | commands: 147 | 148 | ```console 149 | cd pshtt 150 | pyenv virtualenv pshtt 151 | pyenv local pshtt 152 | pip install --requirement requirements-dev.txt 153 | ``` 154 | 155 | #### Installing the pre-commit hook #### 156 | 157 | Now setting up pre-commit is as simple as: 158 | 159 | ```console 160 | pre-commit install 161 | ``` 162 | 163 | At this point the pre-commit checks will run against any files that 164 | you attempt to commit. If you want to run the checks against the 165 | entire repo, just execute `pre-commit run --all-files`. 166 | 167 | ### Running unit and system tests ### 168 | 169 | In addition to the pre-commit checks the CI system will run the suite 170 | of unit and system tests that are included with this project. To run 171 | these tests locally execute `pytest` from the root of the project. 172 | 173 | We encourage any updates to these tests to improve the overall code 174 | coverage. If your pull request adds new functionality we would 175 | appreciate it if you extend existing test cases, or add new ones to 176 | exercise the newly added code. 177 | 178 | ## Public domain ## 179 | 180 | This project is in the public domain within the United States, and 181 | copyright and related rights in the work worldwide are waived through 182 | the [CC0 1.0 Universal public domain 183 | dedication](https://creativecommons.org/publicdomain/zero/1.0/). 184 | 185 | All contributions to this project will be released under the CC0 186 | dedication. By submitting a pull request, you are agreeing to comply 187 | with this waiver of copyright interest. 188 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pushing HTTPS 🔒 # 2 | 3 | [![Latest Version](https://img.shields.io/pypi/v/pshtt.svg)](https://pypi.org/project/pshtt/) 4 | [![GitHub Build Status](https://github.com/cisagov/pshtt/workflows/build/badge.svg)](https://github.com/cisagov/pshtt/actions) 5 | [![CodeQL](https://github.com/cisagov/pshtt/workflows/CodeQL/badge.svg)](https://github.com/cisagov/pshtt/actions/workflows/codeql-analysis.yml) 6 | [![Coverage Status](https://coveralls.io/repos/github/cisagov/pshtt/badge.svg?branch=develop)](https://coveralls.io/github/cisagov/pshtt?branch=develop) 7 | [![Known Vulnerabilities](https://snyk.io/test/github/cisagov/pshtt/develop/badge.svg)](https://snyk.io/test/github/cisagov/pshtt) 8 | 9 | `pshtt` (*"pushed"*) is a tool to scan domains for HTTPS best 10 | practices. It saves its results to a CSV (or JSON) file. 11 | 12 | `pshtt` was developed to *push* organizations — especially large ones 13 | like the US Federal Government :us: — to adopt HTTPS across the 14 | enterprise. Federal agencies must comply with 15 | [M-15-13](https://https.cio.gov), a 2015 memorandum from the White 16 | House Office of Management and Budget, and [BOD 17 | 18-01](https://cyber.dhs.gov/bod/18-01/), a 2017 directive from the 18 | Department of Homeland Security, which require federal agencies to 19 | enforce HTTPS on their public web services. Much has been done, but 20 | there's [more yet to 21 | do](https://18f.gsa.gov/2017/01/04/tracking-the-us-governments-progress-on-moving-https/). 22 | 23 | `pshtt` is a collaboration between the Cyber and Infrastructure 24 | Security Agency's [National Cybersecurity Assessments and Technical 25 | Services (NCATS) team](https://github.com/cisagov) and [the General 26 | Service Administration's 18F team](https://18f.gsa.gov), with 27 | [contributions from NASA, Lawrence Livermore National Laboratory, and 28 | various non-governmental 29 | organizations](https://github.com/cisagov/pshtt/graphs/contributors). 30 | 31 | ## Getting started ## 32 | 33 | `pshtt` can be installed as a module, or run directly from the 34 | repository. 35 | 36 | ### Installed as a module ### 37 | 38 | `pshtt` can be installed directly via pip: 39 | 40 | ```console 41 | pip install pshtt 42 | ``` 43 | 44 | It can then be run directly: 45 | 46 | ```console 47 | pshtt example.com [options] 48 | ``` 49 | 50 | ### Running directly ### 51 | 52 | To run the tool locally from the repository, without installing, first 53 | install the requirements: 54 | 55 | ```console 56 | pip install -r requirements.txt 57 | ``` 58 | 59 | Then run it as a module via `python -m`: 60 | 61 | ```console 62 | python -m pshtt.cli example.com [options] 63 | ``` 64 | 65 | ### Usage and examples ### 66 | 67 | ```console 68 | pshtt [options] DOMAIN... 69 | pshtt [options] INPUT 70 | 71 | pshtt dhs.gov 72 | pshtt --output=homeland.csv --debug dhs.gov us-cert.gov usss.gov 73 | pshtt --sorted current-federal.csv 74 | ``` 75 | 76 | Note: if INPUT ends with `.csv`, domains will be read from the first 77 | column of the CSV. CSV output will always be written to disk (unless 78 | --json is specified), defaulting to `results.csv`. 79 | 80 | #### Options #### 81 | 82 | ```console 83 | -h --help Show this message. 84 | -s --sorted Sort output by domain, A-Z. 85 | -o --output=OUTFILE Name output file. (Defaults to "results".) 86 | -j --json Get results in JSON. (Defaults to CSV.) 87 | -m --markdown Get results in Markdown. (Defaults to CSV.) 88 | -d --debug Print debug output. 89 | -u --user-agent=AGENT Override user agent. 90 | -t --timeout=TIMEOUT Override timeout (in seconds). 91 | -c --cache-third-parties=DIR Cache third party data, and what directory to cache it in. 92 | -f --ca-file=PATH Specify custom CA bundle (PEM format) 93 | ``` 94 | 95 | ##### Using your own CA bundle ##### 96 | 97 | By default, `pshtt` relies on the root CAs that are trusted in the 98 | [Mozilla root 99 | store](https://hg.mozilla.org/mozilla-central/raw-file/tip/security/nss/lib/ckfw/builtins/certdata.txt). 100 | If you work behind a corporate proxy or have your own certificates that 101 | aren't publicly trusted, you can specify your own CA bundle: 102 | 103 | ```console 104 | pshtt --ca-file=/etc/ssl/ca.pem server.internal-location.gov 105 | ``` 106 | 107 | ## What's checked? ## 108 | 109 | A domain is checked on its four endpoints: 110 | 111 | - `http://` 112 | - `http://www` 113 | - `https://` 114 | - `https://www` 115 | 116 | ### Domain and redirect info ### 117 | 118 | The following values are returned in `results.csv`: 119 | 120 | - `Domain` - The domain you're scanning! 121 | - `Base Domain` - The base domain of `Domain`. For example, for a 122 | Domain of `sub.example.com`, the Base Domain will be 123 | `example.com`. Usually this is the second-level domain, but `pshtt` 124 | will download and factor in the [Public Suffix 125 | List](https://publicsuffix.org) when calculating the base 126 | domain. (To cache the Public Suffix List, use `--suffix-cache` as 127 | documented above.) 128 | - `Canonical URL` - One of the four endpoints described above; a 129 | judgment call based on the observed redirect logic of the domain. 130 | - `Live` - The domain is "live" if any endpoint is live. 131 | - `HTTPS Live` - The domain is "HTTPS live" if any HTTPS endpoint is 132 | live. 133 | - `HTTPS Full Connection` - The domain is "fully connected" if any 134 | HTTPS endpoint is fully connected. A "fully connected" HTTPS 135 | endpoint is one with which pshtt could make a full TLS connection. 136 | - `HTTPS Client Auth Required` - A domain requires client 137 | authentication if *any* HTTPS endpoint requires it for a full TLS 138 | connection. 139 | - `Redirect` - The domain is a "redirect domain" if at least one 140 | endpoint is a redirect, and all endpoints are either redirects or 141 | down. 142 | - `Redirect to` - If a domain is a "redirect domain", where does it 143 | redirect to? 144 | 145 | ### Landing on HTTPS ### 146 | 147 | - `Valid HTTPS` - A domain has "valid HTTPS" if it responds on port 148 | 443 at the hostname in its Canonical URL with an unexpired valid 149 | certificate for the hostname. This can be true even if the Canonical 150 | URL uses HTTP. 151 | - `HTTPS Publicly Trusted` - A domain is "publicly trusted" if its 152 | canonical endpoint has a publicly trusted certificate. 153 | - `HTTPS Custom Truststore Trusted` - A domain is "custom truststore 154 | trusted" if its canonical endpoint has a certificate that is trusted 155 | by the custom truststore. 156 | - `Defaults to HTTPS` - A domain "defaults to HTTPS" if its canonical 157 | endpoint uses HTTPS. 158 | - `Downgrades HTTPS` - A domain "downgrades HTTPS" if HTTPS is 159 | supported in some way, but its canonical HTTPS endpoint immediately 160 | redirects internally to HTTP. 161 | - `Strictly Forces HTTPS` - This is different than whether a domain 162 | "defaults" to HTTPS. A domain "Strictly Forces HTTPS" if one of the 163 | HTTPS endpoints is "live", and if both HTTP endpoints are either 164 | down or redirect immediately to any HTTPS URI. An HTTP redirect can 165 | go to HTTPS on another domain, as long as it's immediate. (A domain 166 | with an invalid cert can still be enforcing HTTPS.) 167 | 168 | ### Common errors ### 169 | 170 | - `HTTPS Bad Chain` - A domain has a bad chain if either HTTPS 171 | endpoint contains a bad chain. 172 | - `HTTPS Bad Hostname` - A domain has a bad hostname if either HTTPS 173 | endpoint fails hostname validation. 174 | - `HTTPS Expired Cert` - A domain has an expired certificate if either 175 | HTTPS endpoint has an expired certificate. 176 | - `HTTPS Self-Signed Cert` - A domain has a self-signed certificate if 177 | either HTTPS endpoint has a self-signed certificate. 178 | - `HTTPS Probably Missing Intermediate Cert` - A domain is "probably 179 | missing intermediate certificate" if the canonical HTTPS endpoint is 180 | probably missing an intermediate certificate. 181 | 182 | ### HSTS ### 183 | 184 | - `HSTS` - A domain has HTTP Strict Transport Security enabled if its 185 | canonical HTTPS endpoint has HSTS enabled. 186 | - `HSTS Header` - This field provides a domain's HSTS header at its 187 | canonical endpoint. 188 | - `HSTS Max Age` - A domain's HSTS max-age is its canonical endpoint's 189 | max-age. 190 | - `HSTS Entire Domain` - A domain has HSTS enabled for the entire 191 | domain if its **root HTTPS endpoint** (*not the canonical HTTPS 192 | endpoint*) has HSTS enabled and uses the HSTS `includeSubDomains` 193 | flag. 194 | - `HSTS Preload Ready` - A domain is HSTS "preload ready" if its 195 | **root HTTPS endpoint** (*not the canonical HTTPS endpoint*) has 196 | HSTS enabled, has a max-age of at least 18 weeks, and uses the 197 | `includeSubDomains` and `preload` flag. 198 | - `HSTS Preload Pending` - A domain is "preload pending" when it 199 | appears in the [Chrome preload pending 200 | list](https://hstspreload.org/api/v2/pending) with the 201 | `include_subdomains` flag equal to `true`. The intent of `pshtt` is 202 | to make sure that the user is *fully* protected, so it only counts 203 | domains as HSTS preloaded if they are *fully* HSTS preloaded 204 | (meaning that all subdomains are included as well). 205 | - `HSTS Preloaded` - A domain is HSTS preloaded if its domain name 206 | appears in the [Chrome preload 207 | list](https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json) 208 | with the `include_subdomains` flag equal to `true`, regardless of 209 | what header is present on any endpoint. The intent of `pshtt` is to 210 | make sure that the user is *fully* protected, so it only counts 211 | domains as HSTS preloaded if they are *fully* HSTS preloaded 212 | (meaning that all subdomains are included as well). 213 | - `Base Domain HSTS Preloaded` - A domain's base domain is HSTS 214 | preloaded if its base domain appears in the [Chrome preload 215 | list](https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json) 216 | with the `include_subdomains` flag equal to `true`. This is subtly 217 | different from `HSTS Entire Domain`, which inspects headers on the 218 | base domain to see if HSTS is set correctly to encompass the entire 219 | zone. 220 | 221 | ### Scoring ### 222 | 223 | These three fields use the previous results to come to high-level 224 | conclusions about a domain's behavior. 225 | 226 | - `Domain Supports HTTPS` - A domain 'Supports HTTPS' when it doesn't 227 | downgrade and has valid HTTPS, or when it doesn't downgrade and has 228 | a bad chain but not a bad hostname (a bad hostname makes it clear 229 | the domain isn't actively attempting to support HTTPS, whereas an 230 | incomplete chain is just a mistake.). Domains with a bad chain 231 | "support" HTTPS but user-side errors can be expected. 232 | - `Domain Enforces HTTPS` - A domain that 'Enforces HTTPS' must 233 | 'Support HTTPS' and default to HTTPS. For websites (where `Redirect` 234 | is `false`) they are allowed to *eventually* redirect to an 235 | `https://` URI. For "redirect domains" (domains where the `Redirect` 236 | value is `true`) they must *immediately* redirect clients to an 237 | `https://` URI (even if that URI is on another domain) in order to 238 | be said to enforce HTTPS. 239 | - `Domain Uses Strong HSTS` - A domain 'Uses Strong HSTS' when the 240 | max-age ≥ 31536000. 241 | 242 | ### General information ### 243 | 244 | - `IP` - The IP for the domain. 245 | - `Server Header` - The server header from the response for the 246 | domain. 247 | - `Server Version` - The server version, as extracted from the server 248 | header. 249 | - `HTTPS Cert Chain Length` - The certificate chain length for the 250 | canonical HTTPS endpoint. 251 | - `Notes` - A field where free-form notes about the domain can be 252 | stored. 253 | 254 | ### Uncommon errors ### 255 | 256 | - `Unknown Error` - A Boolean value indicating whether or not an 257 | unexpected exception was encountered when testing the domain. The 258 | purpose of this field is to flag any odd websites for further 259 | debugging. 260 | 261 | ## Troubleshooting ## 262 | 263 | ### DNS blackhole / DNS assist ### 264 | 265 | One issue which can occur when running `pshtt`, particularly for 266 | home/residential networks, with standard ISPs is the use of "DNS 267 | Assist" features, a.k.a. "DNS Blackholes". 268 | 269 | In these environments, you may see inconsistent results from `pshtt` 270 | owing to the fact that your ISP is attempting to detect a request for 271 | an unknown site without a DNS record and is redirecting you to a 272 | search page for that site. This means that an endpoint which *should* 273 | resolve as "not-alive", will instead resolve as "live", owing to the 274 | detection of the live search result page. 275 | 276 | If you would like to disable this "feature", several ISPs offer the 277 | ability to opt out of this service, and maintain their own 278 | instructions for doing so: 279 | 280 | - [AT&T](http://www.att.net/dnserrorassist/about/srchTrm=Redirect%20Bin) 281 | - [FIOS](https://www.verizon.com/support/residential/internet/fiosinternet/troubleshooting/network/questionsone/99147.htm) 282 | 283 | ## Who uses pshtt? ## 284 | 285 | - GSA maintains [Pulse](https://pulse.cio.gov), a dashboard that 286 | tracks how federal government domains are meeting best practices on 287 | the web. [Pulse is open source](https://github.com/18F/pulse). 288 | - The Freedom of the Press Foundation runs 289 | [securethe.news](https://securethe.news), a site that aims to "track 290 | and promote the adoption of HTTPS encryption by major news 291 | organizations' websites". [Secure the News is open 292 | source](https://securethe.news/blog/secure-news-open-source/). 293 | - DHS issues [HTTPS Reports](https://18f.gsa.gov/2017/01/06/open-source-collaboration-across-agencies-to-improve-https-deployment/) 294 | to federal executive branch agencies. 295 | 296 | ## Acknowledgements ## 297 | 298 | This code was modeled after [Ben 299 | Balter](https://github.com/benbalter)'s 300 | [site-inspector](https://github.com/benbalter/site-inspector), with 301 | significant guidance from [Eric Mill](https://github.com/konklone). 302 | 303 | ## Contributing ## 304 | 305 | We welcome contributions! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for 306 | details. 307 | 308 | ## License ## 309 | 310 | This project is in the worldwide [public domain](LICENSE). 311 | 312 | This project is in the public domain within the United States, and 313 | copyright and related rights in the work worldwide are waived through 314 | the [CC0 1.0 Universal public domain 315 | dedication](https://creativecommons.org/publicdomain/zero/1.0/). 316 | 317 | All contributions to this project will be released under the CC0 318 | dedication. By submitting a pull request, you are agreeing to comply 319 | with this waiver of copyright interest. 320 | -------------------------------------------------------------------------------- /bump-version: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # bump-version [--push] [--label LABEL] (major | minor | patch | prerelease | build | finalize | show) 4 | # bump-version --list-files 5 | 6 | set -o nounset 7 | set -o errexit 8 | set -o pipefail 9 | 10 | # Stores the canonical version for the project. 11 | VERSION_FILE=src/pshtt/_version.py 12 | # Files that should be updated with the new version. 13 | VERSION_FILES=("$VERSION_FILE") 14 | 15 | USAGE=$( 16 | cat << END_OF_LINE 17 | Update the version of the project. 18 | 19 | Usage: 20 | ${0##*/} [--push] [--label LABEL] (major | minor | patch | prerelease | build | finalize | show) 21 | ${0##*/} --list-files 22 | ${0##*/} (-h | --help) 23 | 24 | Options: 25 | -h | --help Show this message. 26 | --push Perform a \`git push\` after updating the version. 27 | --label LABEL Specify the label to use when updating the build or prerelease version. 28 | --list-files List the files that will be updated when the version is bumped. 29 | END_OF_LINE 30 | ) 31 | 32 | old_version=$(sed -n "s/^__version__ = \"\(.*\)\"$/\1/p" $VERSION_FILE) 33 | # Comment out periods so they are interpreted as periods and don't 34 | # just match any character 35 | old_version_regex=${old_version//\./\\\.} 36 | new_version="$old_version" 37 | 38 | bump_part="" 39 | label="" 40 | commit_prefix="Bump" 41 | with_push=false 42 | commands_with_label=("build" "prerelease") 43 | commands_with_prerelease=("major" "minor" "patch") 44 | with_prerelease=false 45 | 46 | ####################################### 47 | # Display an error message, the help information, and exit with a non-zero status. 48 | # Arguments: 49 | # Error message. 50 | ####################################### 51 | function invalid_option() { 52 | echo "$1" 53 | echo "$USAGE" 54 | exit 1 55 | } 56 | 57 | ####################################### 58 | # Bump the version using the provided command. 59 | # Arguments: 60 | # The version to bump. 61 | # The command to bump the version. 62 | # Returns: 63 | # The new version. 64 | ####################################### 65 | function bump_version() { 66 | local temp_version 67 | temp_version=$(python -c "import semver; print(semver.parse_version_info('$1').${2})") 68 | echo "$temp_version" 69 | } 70 | 71 | if [ $# -eq 0 ]; then 72 | echo "$USAGE" 73 | exit 1 74 | else 75 | while [ $# -gt 0 ]; do 76 | case $1 in 77 | --push) 78 | if [ "$with_push" = true ]; then 79 | invalid_option "Push has already been set." 80 | fi 81 | 82 | with_push=true 83 | shift 84 | ;; 85 | --label) 86 | if [ -n "$label" ]; then 87 | invalid_option "Label has already been set." 88 | fi 89 | 90 | label="$2" 91 | shift 2 92 | ;; 93 | build | finalize | major | minor | patch) 94 | if [ -n "$bump_part" ]; then 95 | invalid_option "Only one version part should be bumped at a time." 96 | fi 97 | 98 | bump_part="$1" 99 | shift 100 | ;; 101 | prerelease) 102 | with_prerelease=true 103 | shift 104 | ;; 105 | show) 106 | echo "$old_version" 107 | exit 0 108 | ;; 109 | -h | --help) 110 | echo "$USAGE" 111 | exit 0 112 | ;; 113 | --list-files) 114 | printf '%s\n' "${VERSION_FILES[@]}" 115 | exit 0 116 | ;; 117 | *) 118 | invalid_option "Invalid option: $1" 119 | ;; 120 | esac 121 | done 122 | fi 123 | 124 | if [ -n "$label" ] && [ "$with_prerelease" = false ] && [[ ! " ${commands_with_label[*]} " =~ [[:space:]]${bump_part}[[:space:]] ]]; then 125 | invalid_option "Setting the label is only allowed for the following commands: ${commands_with_label[*]}" 126 | fi 127 | 128 | if [ "$with_prerelease" = true ] && [ -n "$bump_part" ] && [[ ! " ${commands_with_prerelease[*]} " =~ [[:space:]]${bump_part}[[:space:]] ]]; then 129 | invalid_option "Changing the prerelease is only allowed in conjunction with the following commands: ${commands_with_prerelease[*]}" 130 | fi 131 | 132 | label_option="" 133 | if [ -n "$label" ]; then 134 | label_option="token='$label'" 135 | fi 136 | 137 | if [ -n "$bump_part" ]; then 138 | if [ "$bump_part" = "finalize" ]; then 139 | commit_prefix="Finalize" 140 | bump_command="finalize_version()" 141 | elif [ "$bump_part" = "build" ]; then 142 | bump_command="bump_${bump_part}($label_option)" 143 | else 144 | bump_command="bump_${bump_part}()" 145 | fi 146 | new_version=$(bump_version "$old_version" "$bump_command") 147 | echo Changing version from "$old_version" to "$new_version" 148 | fi 149 | 150 | if [ "$with_prerelease" = true ]; then 151 | bump_command="bump_prerelease($label_option)" 152 | temp_version=$(bump_version "$new_version" "$bump_command") 153 | echo Changing version from "$new_version" to "$temp_version" 154 | new_version="$temp_version" 155 | fi 156 | 157 | tmp_file=/tmp/version.$$ 158 | for version_file in "${VERSION_FILES[@]}"; do 159 | if [ ! -f "$version_file" ]; then 160 | echo Missing expected file: "$version_file" 161 | exit 1 162 | fi 163 | sed "s/$old_version_regex/$new_version/" "$version_file" > $tmp_file 164 | mv $tmp_file "$version_file" 165 | done 166 | 167 | git add "${VERSION_FILES[@]}" 168 | git commit --message "$commit_prefix version from $old_version to $new_version" 169 | 170 | if [ "$with_push" = true ]; then 171 | git push 172 | fi 173 | -------------------------------------------------------------------------------- /gce-scripts/README.md: -------------------------------------------------------------------------------- 1 | # Pshtt as an HTTPS status checker # 2 | 3 | Welcome! This is the documentation on how to run pshtt to scan sites for their 4 | HTTPS status. These instructions are mostly about how to run it at scale, but at 5 | the end, there are instructions on how to run on a local instance. 6 | 7 | This document goes over how to both run pshtt on multiple instances on google 8 | cloud engine and also how to run it as a singular instance on your local 9 | machine. It takes about 30 minutes to set up from start to finish. 10 | 11 | Running pshtt on 150 instances takes about 12 - 15 hours for a million sites. 12 | Assume at worst that each site will take 10 seconds (which is the default 13 | timeout) and scale up to whatever timeframe you want to run in based off of 14 | that. 15 | 16 | Example: 1000 sites in 2 hours would take 2 instances. 17 | 18 | ## How to run pshtt on Google Cloud Engine ## 19 | 20 | ### Before you run ### 21 | 22 | 1. Set up a [google compute engine 23 | account](https://cloud.google.com/compute/docs/access/user-accounts/). 24 | 25 | 1. Make sure you have the correct quota allowances. 26 | - Go to the [quotas page](https://cloud.google.com/compute/quotas) 27 | and select the project that you want to run this under. 28 | - Request quotas --- click on the following items in the list and click 29 | "edit qutoas" at the top of the page: 30 | - CPUS (all regions) --> 150 31 | - In use IP addresses --> 150 32 | - One Region's in use IPs (ex us-west1) --> 150 33 | - Same Region's CPUs (ex. us-west1) --> 150 34 | 35 | 1. Create Instance Group Template. 36 | 37 | You will want to run multiple instances (presumably), and creating an 38 | Instance Group template allows you to make up to 150 machines under the same 39 | template. 40 | 41 | - Go to Compute Engine, then click on the Instance templates 42 | tab and click "Create Instance Template". 43 | - Name --> "pshtt-template" 44 | - Machine type -- 1 CPU (n1-standard-1 (1 vCPU, 3.75 GB memory)). 45 | - Check allow HTTP and HTTPS traffic. 46 | - Boot Disk --- Ubuntu 14.04 LTS. 47 | - automatic restart (under management tab) -- off. 48 | - Hit create. 49 | 50 | 1. Create a ssh key ONLY for the google cloud instances and upload to your 51 | profile. 52 | 53 | This is a security measure. ***DO NOT USE YOUR REGULAR SSH KEY.*** 54 | 55 | - `cd ~/.ssh && ssh-keygen -t rsa -f gce_pshtt_key` 56 | - Go to the [metadata 57 | tab](https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys) 58 | and hit edit. 59 | - `cd ~/.ssh && cat gce_pshtt_key.pub` 60 | - Copy the output of the above command and paste it into the console. 61 | 62 | 1. Create the instance group. 63 | 64 | It is important to name your instance group something identifiable, 65 | especially if you are sharing a project with others. Remember this instance 66 | group name for a later step. ***We recommend that you try one instance at 67 | first to make sure it works***. 68 | 69 | - Go to the instance group tab. 70 | - Click Multi-Zone, and select the region that you requested your 71 | instances for. 72 | - Chose "pshtt-template" under instance template. 73 | - Hit create. 74 | - Welcome to your new instance group! 75 | 76 | ### Updating data files and setting up to run ### 77 | 78 | The following is a set of commands to run to make your running directory. 79 | 80 | 1. Download the gcloud command line tool. 81 | 82 | - Follow the [download 83 | link](https://cloud.google.com/sdk/docs/#install_the_latest_cloud_tools_version_cloudsdk_current_version) 84 | and install the correct sdk for your OS. 85 | - If this is your first time installing the gcloud command line tool, 86 | follow the instructions on the page. Do not set any default zones. 87 | - If you already have this installed, following the following 88 | instructions: 89 | - `gcloud init` 90 | - Click `2` create a new configuration. 91 | - Enter `pshtt-configuration` 92 | - Choose the appropriate account 93 | - Click the appopriate number corresponding to your google project 94 | - If it complains that the API is not enabled, hit enabled and retry. 95 | - Do not set default zone or region 96 | - At this point, your default project should be this google project. 97 | You can switch to any of your previous projects by running `gcloud 98 | config set project PROJECTNAME` 99 | 100 | 1. Setting up your directory. 101 | 102 | - `mkdir ~/pshtt_run` 103 | - Creates the dir that you will run your program out of. 104 | - `gcloud compute instances list | sed -n '1!p' | grep 105 | "" | awk '{print $5}' > ~/pshtt_run/hosts.txt` 106 | - `` is what you named the instance group you created 107 | above. 108 | 109 | 1. Copy all .sh scripts from this directory: 110 | 111 | - Keep the name of the scripts the same. 112 | - `chmod +x ~/pshtt_run/*.sh` 113 | - which will make all the scripts executable. 114 | - `touch domains.csv` 115 | - Your domain list, one domain per line, with the input list ending in 116 | `.csv`. 117 | - Domains must have the schema stripped of them and no trailing '/', 118 | such as: 119 | - `domain.tld` 120 | - `subdomain.domain.tld` 121 | - `www.subdomain.domain.tld` 122 | - `mkdir ~/pshtt_run/data_results/` 123 | - `mv ~/pshtt_run/combine_shards.py ~/pshtt_run/data_results` 124 | - Places combine_shards.py into data_results/. 125 | - `mkdir ~/pshtt_run/input_files/` 126 | 127 | 1. roots.pem 128 | 129 | We want to use our own CA file when running pshtt. We use the mozilla root 130 | store for this purpose. Follow instructions on this 131 | [PR](https://github.com/agl/extract-nss-root-certs). 132 | 133 | 1. Updating ssh key 134 | 135 | - If your new ssh key is called "gce_pshtt_key", skip this step. 136 | - If you did not name your ***new*** ssh key gce_pshtt_key, then you will 137 | need to go through and rename the gce_pshtt_key in all the .sh files to 138 | whatever you named your key. 139 | - In vim, this is `:%s/gce_pshtt_key/yourkeynamehere/g `. 140 | 141 | ### How to run ### 142 | 143 | 1. `screen -S pshtt_running` 144 | 1. `cd ~/pshtt_run/` 145 | 1. `./run_all_scripts > 146 | log.out` 147 | - Number of shards == number of hosts 148 | - Each machine will contain a shard of the data to run. 149 | - This is the script that sets up all machines and puts all datafiles on 150 | the machines for running. 151 | - `./run_all_scripts top-1m.nocommas.8.31.2017 100 alexa` 152 | - Will produce 100 shards all starting with "alexa" in the input_files 153 | dir. 154 | - ex. alexa000.csv 155 | - NOTE: you can ONLY create 999 shards. If you need more than 999 shards, 156 | you will need to change the split_up_dataset.sh file. 157 | 1. Exit screen `cntr+a+d` 158 | 159 | ### During the run ### 160 | 161 | - `./check_instances.sh` 162 | - Will print the ip of each host, as well as FINISHED or NOT FINISHED. 163 | 164 | ### After the run ### 165 | 166 | - `./grab_and_combine_data.sh` 167 | - Will grab all log and result data files, combine data files into one 168 | large result file, and put these into data_results/. 169 | - Delete your instance group. If you want to run data analysis, jump down to 170 | the data analysis portion. 171 | 172 | ## Running pshtt on your local machine ## 173 | 174 | 1. Copy packages_to_install.sh and install the packages_to_install.sh. 175 | - `sudo ./packages_to_install.sh` 176 | 1. Clone pshtt. 177 | - `git clone https://github.com/dhs-ncats/pshtt.git` 178 | 1. Put roots.pem, running_script.sh, and your input file in the same dir as 179 | pshtt. 180 | - Follow directions under Updating data files above on how to get a 181 | roots.pem. 182 | - Domains must have the schema stripped of them and no trailing '/', such 183 | as: 184 | - `domain.tld` 185 | - `subdomain.domain.tld` 186 | - `www.subdomain.domain.tld` 187 | - `chmod +x running_script.sh` to make it executable. 188 | 1. Run `./running_script.sh ` 189 | 1. Results and profit. 190 | - Results can be found in `.json`. 191 | - If you want to be able to use this json file with any of the colab 192 | notebooks (like the one listed below), you will also need to run 193 | combine_shards.py.into the same dir as the json file. 194 | - Copy combine_shards.py into the same dir as the json file. 195 | - `echo .json > to_combine.txt` 196 | - `python combine_shards.py to_combine.txt > final_results.json` 197 | - Log can be found in `time_.txt`. 198 | -------------------------------------------------------------------------------- /gce-scripts/check_instances.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Checks all the instances in hosts and checks the end of the log file 4 | # to see if it's finished. The script prints out FINISHED or NOT FINISHED 5 | # for each host respectively. 6 | 7 | hosts_file='hosts.txt' 8 | list_of_files=$(ls -1q input_files) 9 | i=1 10 | 11 | # Grab the correct input file for the corresponding machine. 12 | for z in $list_of_files; do 13 | machine=$(sed "${i}q;d" $hosts_file) 14 | # Check if the file has 'Wrote Results', which indicates that it's finished. 15 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail pshtt/time_"${z}".txt | grep -q 'Wrote results' 16 | finished=$? 17 | if [[ "${finished}" -eq 0 ]]; then 18 | echo 'server '"${machine}"' FINISHED' 19 | else 20 | echo 'server '"${machine}"' NOT FINISHED' 21 | fi 22 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" cat pshtt/time_"${z}".txt | grep -q 'Traceback' 23 | error=$? 24 | if [[ "${error}" -eq 0 ]]; then 25 | echo 'server '"${machine}"' ERROR ON THIS MACHINE. CHECK INSTANCE.' 26 | else 27 | echo 'server '"${machine}"' NO ERROR.' 28 | fi 29 | ((i = i + 1)) 30 | done 31 | -------------------------------------------------------------------------------- /gce-scripts/combine_shards.py: -------------------------------------------------------------------------------- 1 | """Combines pshtt shards into one final data file.""" 2 | 3 | # Standard Python Libraries 4 | import json 5 | import sys 6 | 7 | 8 | def main(): 9 | """Read a file with a list of shard filenames and combine them.""" 10 | if (len(sys.argv)) < 2: 11 | print("you need a filename!") 12 | exit(1) 13 | # Master file is the file with the list of filenames to intake. 14 | # Fileception. 15 | master_file = sys.argv[1] 16 | filenames = [] 17 | 18 | # Read in the filenames that are the different shards. 19 | with open(master_file) as input_file: 20 | for line in input_file: 21 | filenames.append(line.rstrip()) 22 | # For each shard, read it in and append to the final list to 23 | # print out. 24 | for f in filenames: 25 | with open(f) as input_file: 26 | json_data = json.load(input_file) 27 | for item in json_data: 28 | print(json.dumps(item)) 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /gce-scripts/grab_and_combine_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If pshtt is done on all machines, it grabs both 4 | # the log file and the output file from the machines and 5 | # places them in the data_results/ directory. 6 | 7 | # This script also sets up the files to be combined by 8 | # the combine_shards script. Because pshtt outputs the results 9 | # as a list of dicts, we need to combine all of those lists. 10 | # We output the dicts as a file of dicts, one per line. 11 | hosts_file='hosts.txt' 12 | list_of_files=$(ls -1q input_files) 13 | i=1 14 | 15 | for z in $list_of_files; do 16 | machine=$(sed "${i}q;d" $hosts_file) 17 | echo 'Kicking off '"${machine}"' number '$i 18 | # Grab the actual result file. 19 | echo 'grabbing result file' 20 | scp -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}":~/pshtt/"${z}".json data_results/ 21 | echo $? 22 | # Grab the log file from that machine. 23 | echo 'grabbing log file' 24 | scp -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}":~/pshtt/time_"${z}".txt data_results/ 25 | echo $? 26 | echo 'creating to_combine.txt' 27 | touch data_results/to_combine.txt 28 | echo $? 29 | echo 'putting file name into combine script' 30 | "${z}"'.json' >> data_results/to_combine.txt 31 | echo $? 32 | ((i = i + 1)) 33 | done 34 | 35 | cd data_results || exit 36 | python combine_shards.py to_combine.txt > final_results.json 37 | -------------------------------------------------------------------------------- /gce-scripts/packages_to_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Installs all the necessary packages for pshtt to run. 4 | # Logs which package it is installing as well as it's success (0) or failure 5 | # (1). 6 | echo 'UPDATE' 7 | apt-get -y update -qq 8 | echo $? ' ERROR CODE' 9 | echo 'GIT' 10 | apt-get -y install git -qq 11 | echo $? ' ERROR CODE' 12 | echo 'PYTHON3-PIP' 13 | apt-get -y install python3-pip -qq 14 | echo $? ' ERROR CODE' 15 | echo 'LIBFFI6' 16 | apt-get -y install libffi6 libffi-dev -qq 17 | echo $? ' ERROR CODE' 18 | echo 'LIBSSL' 19 | apt-get -y install build-essential libssl-dev libffi-dev python3-dev -qq 20 | echo $? ' ERROR CODE' 21 | echo 'SETUPTOOLS' 22 | pip3 install --upgrade setuptools -qq 23 | echo $? ' ERROR CODE' 24 | echo 'CFFI' 25 | pip3 install cffi -qq 26 | echo $? ' ERROR CODE' 27 | echo 'SSLYZE' 28 | pip3 install sslyze -qq 29 | echo $? ' ERROR CODE' 30 | echo 'PUBLIC SUFFIX' 31 | pip3 install publicsuffix -qq 32 | echo $? ' ERROR CODE' 33 | echo 'REQUESTS' 34 | pip3 install --upgrade requests -qq 35 | echo $? ' ERROR CODE' 36 | echo 'DOCOPT' 37 | pip3 install docopt -qq 38 | echo $? ' ERROR CODE' 39 | echo 'PYOPENSSL' 40 | pip3 install pyopenssl -qq 41 | echo $? ' ERROR CODE' 42 | echo 'PYTABLEWRITER' 43 | pip3 install pytablewriter -qq 44 | echo $? ' ERROR CODE' 45 | echo 'TYPING' 46 | pip3 install typing -qq 47 | echo $? ' ERROR CODE' 48 | echo 'FINISHED INSTALLING PACKAGES' 49 | -------------------------------------------------------------------------------- /gce-scripts/run_all_scripts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is the first script to run. This script calls 4 | # all the other pertinent scripts for setting up 5 | # and kicking off runs. 6 | 7 | # ./run_all_scripts.sh <#_of_shards> 8 | # Ex: ./run_all_scripts.sh top-1m.nocommas.8.31.2017 100 alexa 9 | 10 | # Only the first input argument is required. The other two will default 11 | # to 10 and shard respectively. 12 | 13 | # will split up the file top-1m.nocommas.8.31.2017 into 100 files 14 | # into a dir called input_files, and all the files will start with 15 | # alexa_. So the shard files will be alexa000.csv, alexa001.csv 16 | # etc. 17 | 18 | # If any of the scripts fails, this hard fails and tells the user what script 19 | # went wrong. 20 | 21 | input_file=$1 22 | number_of_shards=${2-10} 23 | output_file_name=${3-shard_} 24 | 25 | echo 'Splitting dataset' 26 | ./split_up_dataset.sh "${input_file}" "${number_of_shards}" "${output_file_name}" 27 | error=$? 28 | 29 | if [[ "${error}" -eq 1 ]]; then 30 | echo 'ERROR WITH SPLIT DATASET SCRIPT' 31 | exit 1 32 | fi 33 | 34 | echo 'Scp and setup' 35 | ./scp_and_setup.sh "${output_file_name}" 36 | error=$? 37 | if [[ "${error}" -eq 1 ]]; then 38 | echo 'ERROR WITH SCP AND SETUP SCRIPT' 39 | exit 1 40 | fi 41 | 42 | echo 'Running instances' 43 | ./run_instances.sh 44 | error=$? 45 | if [[ "${error}" -eq 1 ]]; then 46 | echo 'ERROR WITH RUNNING INSTANCES SCRIPT' 47 | exit 1 48 | fi 49 | -------------------------------------------------------------------------------- /gce-scripts/run_instances.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs pshtt on all instances, using the correct input file. 4 | 5 | hosts_file='hosts.txt' 6 | list_of_files=$(ls -1q input_files/) 7 | i=1 8 | 9 | # For each file, find the corresponding machine it's been uploaded to, 10 | # check if the screen exists (create if not) and kick off pshtt on that screen. 11 | 12 | for z in $list_of_files; do 13 | machine=$(sed "${i}q;d" $hosts_file) 14 | # Check if screen exists. 15 | echo 'Kicking off '"${machine}"' number '$i 16 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -list | grep -q "pshtt_screen" 17 | answer=$? 18 | # If screen does not exist, then create it. 19 | if [[ "${answer}" -eq 1 ]]; then 20 | echo 'Creating screen' 21 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -S pshtt_screen -d -m 22 | echo $? 23 | fi 24 | 25 | # Run script in screen. 26 | echo 'Kicking off script' 27 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" "screen -S pshtt_screen -X -p 0 stuff $'cd pshtt && ./running_script.sh $z\n'" 28 | echo $? 29 | ((i = i + 1)) 30 | done 31 | -------------------------------------------------------------------------------- /gce-scripts/running_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs pshtt with a 10 second timeout, with roots.pem as the CA file, 4 | # and debug on. Logging goes to time_.txt 5 | 6 | # ./running_script.sh test_file.csv 7 | # output files: test_file.csv.json, time_test_file.csv.txt 8 | 9 | input_file=$1 10 | (time python3 -m pshtt.cli "${input_file}" -t 10 -u -j -o "${input_file}".json -f "roots.pem" --debug) 2> time_"${input_file}".txt 11 | -------------------------------------------------------------------------------- /gce-scripts/scp_and_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This file is broken up into three distinct parts. 4 | # The first part is uploading the packages to install 5 | # script to all machines, and kicking it off. 6 | # We do this first because 1) we need those packages to do anything else 7 | # and 2) it takes about 10 - 15 seconds per machine, so we parallelize it. 8 | 9 | # The second part is simply a check to see if the packages are finished 10 | # installing. We test the last machine in the list first because if that is 11 | # finished then all the other machines SHOULD also be finished. After we verify 12 | # that the last machine is finished, loop back through all of the machines and 13 | # make sure that they've all finished. If they haven't print out an error 14 | # warning for that machine and stop the whole process. 15 | # Takes the host file and the list of shards and 16 | # scps shards to hosts. 17 | # Also scps various scripts and installs pshtt 18 | # and all the necessary packages. 19 | # List of IPs, separated by line 20 | hosts_file='hosts.txt' 21 | # number of files that we need to cycle through 22 | num_files=$(find input_files/ -mindepth 1 -maxdepth 1 | wc -l) 23 | # list of files; we do this deterministically 24 | # because then we can run this command across 25 | # other scripts and expect the same order of files. 26 | list_of_files=$(find input_files/ -mindepth 1 -maxdepth 1) 27 | # We flip this bit if we find an error with any of the machines. This tells us 28 | # to stop the process so that the user can go by hand and fix the machine. 29 | error_with_packages=1 30 | 31 | # Upload script and install packages on all machines. 32 | # parallelized. 33 | ################################################################ 34 | for i in seq 1 $num_files; do 35 | # Grab the ip from hosts.txt that corresponds to the file number we are 36 | # uploading. 37 | # If we are uploading file #3 in the list, go to line 3 in the hosts file 38 | # and upload to that ip. 39 | 40 | machine=$(sed "${i}q;d" $hosts_file) 41 | echo "Now on ${machine} number ${i}" 42 | # Do not do strict host key checking so that you dont have to type "yes" for 43 | # each machine. 44 | echo 'Uploading packages_to_install.sh' 45 | scp -i ~/.ssh/gce_pshtt_key -o "StrictHostKeyChecking no" packages_to_install.sh ubuntu@"${machine}":~/ 46 | echo $? 47 | # We echo after each command to ensure that it worked. 0 means success. 48 | # The Log file is how we can tell if the packages have all been uploaded. 49 | echo 'Creating packages log file' 50 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" touch package_log_file.txt 51 | echo $? 52 | # Check to see if this screen exists already. 53 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -list | grep -q "package_screen" 54 | answer=$? 55 | # If the screen exists, then we won't create another one. Otherwise, create. 56 | if [[ "${answer}" -eq 1 ]]; then 57 | echo 'Creating screen' 58 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -S package_screen -d -m 59 | echo $? 60 | fi 61 | # Run packages_to_install and pipe to packages_log_file.txt on each machine. 62 | ssh -i ~/.ssh/gce_pshtt_key -t ubuntu@"${machine}" "screen -S package_screen -X -p 0 stuff $'sudo ./packages_to_install.sh > package_log_file.txt\n'" 63 | echo $? 64 | done 65 | 66 | # Check that all machines have finished installing packages. 67 | ################################################################### 68 | # Grab the last machine in the hosts file. This was the last one to 69 | # be uploaded and kicked off, so presumably it will be the last one 70 | # to finish. 71 | machine=$(sed "${num_files}q;d" $hosts_file) 72 | while true; do 73 | echo 'Waiting on packages to install' 74 | # Wait 10 seconds before checking the file again. 75 | sleep 10 76 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail package_log_file.txt | grep -q 'FINISHED INSTALLING PACKAGES' 77 | finished=$? 78 | if [[ "${finished}" -eq 0 ]]; then 79 | break 80 | fi 81 | done 82 | 83 | for i in seq 1 $num_files; do 84 | machine=$(sed "${i}q;d" $hosts_file) 85 | echo "Now on ${machine} number ${i}" 86 | echo 'Checking packages finished installing' 87 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail package_log_file.txt | grep -q 'FINISHED INSTALLING PACKAGES' 88 | finished=$? 89 | if [[ "${finished}" -eq 0 ]]; then 90 | # Check if any of the machines had a problem installing packages. 91 | ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" cat package_log_file.txt | grep -q '1 ERROR CODE' 92 | error=$? 93 | if [[ "${error}" -eq 0 ]]; then 94 | echo 'ERROR WITH '"${machine}" 95 | error_with_packages=0 96 | fi 97 | fi 98 | done 99 | 100 | # If any of the machines had an error with a package, stop the entire process, 101 | # inform the user. 102 | if [[ "${error_with_packages}" -eq 0 ]]; then 103 | echo 'ERROR FOUND WITH PACKAGES' 104 | exit 1 105 | fi 106 | 107 | # Upload remaining data files. 108 | ##################################################################### 109 | i=1 110 | for y in $list_of_files; do 111 | machine=$(sed "${i}q;d" $hosts_file) 112 | echo "Now on ${machine} number ${i}" 113 | echo 'Cloning github repo file' 114 | ssh -i ~/.ssh/gce_pshtt_key -t ubuntu@"${machine}" git clone https://github.com/dhs-ncats/pshtt.git 115 | echo $? 116 | echo 'copying data file to pshtt directory' 117 | scp -i ~/.ssh/gce_pshtt_key "${y}" ubuntu@"${machine}":~/pshtt/ 118 | echo $? 119 | echo 'Copying roots.pem into pshtt directory' 120 | scp -i ~/.ssh/gce_pshtt_key "roots.pem" ubuntu@"${machine}":~/pshtt/ 121 | echo $? 122 | echo 'Copying running script into pshtt directory' 123 | scp -i ~/.ssh/gce_pshtt_key running_script.sh ubuntu@"${machine}":~/pshtt/ 124 | echo $? 125 | echo "${y}" 126 | ((i = i + 1)) 127 | done 128 | -------------------------------------------------------------------------------- /gce-scripts/split_up_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ./split_up_dataset 4 | # Ex: ./split_up_dataset.sh top-1m.nocommas.8.31.2017 100 alexa 5 | 6 | # Uses split to break up the input file into N shards. 7 | # Because of how split works, some files will be larger or smaller 8 | # than others, but the sum of the files will equal the length of the 9 | # original file. 10 | 11 | # Add .csv suffix because that's what pshtt takes in. 12 | 13 | # Place all files into input_files dir for posterity. 14 | 15 | input_file=$1 16 | number_of_shards=${2-10} 17 | output_file_name=${3-shard_} 18 | 19 | split -a 3 --number=l/"${number_of_shards}" -d "${input_file}" input_files/"${output_file_name}" --additional-suffix=.csv 20 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | # Increase verbosity, display extra test summary info for tests that did not pass, 3 | # display code coverage results, and enable debug logging 4 | addopts = --verbose -ra --cov --log-cli-level=DEBUG 5 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | --editable .[dev] 2 | --requirement requirements-test.txt 3 | build 4 | ipython 5 | mypy 6 | # The bump-version script requires at least version 3 of semver. 7 | semver>=3 8 | twine 9 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | --editable .[test] 2 | --requirement requirements.txt 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Note: Add any additional requirements to setup.py's install_requires field 2 | --editable . 3 | wheel 4 | -------------------------------------------------------------------------------- /setup-env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | set -o pipefail 6 | 7 | USAGE=$( 8 | cat << 'END_OF_LINE' 9 | Configure a development environment for this repository. 10 | 11 | It does the following: 12 | - Allows the user to specify the Python version to use for the virtual environment. 13 | - Allows the user to specify a name for the virtual environment. 14 | - Verifies pyenv and pyenv-virtualenv are installed. 15 | - Creates the Python virtual environment. 16 | - Configures the activation of the virtual enviroment for the repo directory. 17 | - Installs the requirements needed for development (including mypy type stubs). 18 | - Installs git pre-commit hooks. 19 | - Configures git remotes for upstream "lineage" repositories. 20 | 21 | Usage: 22 | setup-env [--venv-name venv_name] [--python-version python_version] 23 | setup-env (-h | --help) 24 | 25 | Options: 26 | -f | --force Delete virtual enviroment if it already exists. 27 | -h | --help Show this message. 28 | -i | --install-hooks Install hook environments for all environments in the 29 | pre-commit config file. 30 | -l | --list-versions List available Python versions and select one interactively. 31 | -v | --venv-name Specify the name of the virtual environment. 32 | -p | --python-version Specify the Python version for the virtual environment. 33 | 34 | END_OF_LINE 35 | ) 36 | 37 | # Display pyenv's installed Python versions 38 | python_versions() { 39 | pyenv versions --bare --skip-aliases --skip-envs 40 | } 41 | 42 | check_python_version() { 43 | local version=$1 44 | 45 | # This is a valid regex for semantically correct Python version strings. 46 | # For more information see here: 47 | # https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string 48 | # Break down the regex into readable parts major.minor.patch 49 | local major="0|[1-9]\d*" 50 | local minor="0|[1-9]\d*" 51 | local patch="0|[1-9]\d*" 52 | 53 | # Splitting the prerelease part for readability 54 | # Start of the prerelease 55 | local prerelease="(?:-" 56 | # Numeric or alphanumeric identifiers 57 | local prerelease+="(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)" 58 | # Additional dot-separated identifiers 59 | local prerelease+="(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*" 60 | # End of the prerelease, making it optional 61 | local prerelease+=")?" 62 | # Optional build metadata 63 | local build="(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?" 64 | 65 | # Final regex composed of parts 66 | local regex="^($major)\.($minor)\.($patch)$prerelease$build$" 67 | 68 | # This checks if the Python version does not match the regex pattern specified in $regex, 69 | # using Perl for regex matching. If the pattern is not found, then prompt the user with 70 | # the invalid version message. 71 | if ! echo "$version" | perl -ne "exit(!/$regex/)"; then 72 | echo "Invalid version of Python: Python follows semantic versioning," \ 73 | "so any version string that is not a valid semantic version is an" \ 74 | "invalid version of Python." 75 | exit 1 76 | # Else if the Python version isn't installed then notify the user. 77 | # grep -E is used for searching through text lines that match the 78 | # specific version. 79 | elif ! python_versions | grep -E "^${version}$" > /dev/null; then 80 | echo "Error: Python version $version is not installed." 81 | echo "Installed Python versions are:" 82 | python_versions 83 | exit 1 84 | else 85 | echo "Using Python version $version" 86 | fi 87 | } 88 | 89 | # Flag to force deletion and creation of virtual environment 90 | FORCE=0 91 | 92 | # Initialize the other flags 93 | INSTALL_HOOKS=0 94 | LIST_VERSIONS=0 95 | PYTHON_VERSION="" 96 | VENV_NAME="" 97 | 98 | # Define long options 99 | LONGOPTS="force,help,install-hooks,list-versions,python-version:,venv-name:" 100 | 101 | # Define short options for getopt 102 | SHORTOPTS="fhilp:v:" 103 | 104 | # Check for GNU getopt by matching a specific pattern ("getopt from util-linux") 105 | # in its version output. This approach presumes the output format remains stable. 106 | # Be aware that format changes could invalidate this check. 107 | if [[ $(getopt --version 2> /dev/null) != *"getopt from util-linux"* ]]; then 108 | cat << 'END_OF_LINE' 109 | 110 | Please note, this script requires GNU getopt due to its enhanced 111 | functionality and compatibility with certain script features that 112 | are not supported by the POSIX getopt found in some systems, particularly 113 | those with a non-GNU version of getopt. This distinction is crucial 114 | as a system might have a non-GNU version of getopt installed by default, 115 | which could lead to unexpected behavior. 116 | 117 | On macOS, we recommend installing brew (https://brew.sh/). Then installation 118 | is as simple as `brew install gnu-getopt` and adding this to your 119 | profile: 120 | 121 | export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH" 122 | 123 | GNU getopt must be explicitly added to the PATH since it 124 | is keg-only (https://docs.brew.sh/FAQ#what-does-keg-only-mean). 125 | 126 | END_OF_LINE 127 | exit 1 128 | fi 129 | 130 | # Check to see if pyenv is installed 131 | if [ -z "$(command -v pyenv)" ] || { [ -z "$(command -v pyenv-virtualenv)" ] && [ ! -f "$(pyenv root)/plugins/pyenv-virtualenv/bin/pyenv-virtualenv" ]; }; then 132 | echo "pyenv and pyenv-virtualenv are required." 133 | if [[ "$OSTYPE" == "darwin"* ]]; then 134 | cat << 'END_OF_LINE' 135 | 136 | On macOS, we recommend installing brew, https://brew.sh/. Then installation 137 | is as simple as `brew install pyenv pyenv-virtualenv` and adding this to your 138 | profile: 139 | 140 | eval "$(pyenv init -)" 141 | eval "$(pyenv virtualenv-init -)" 142 | 143 | END_OF_LINE 144 | 145 | fi 146 | cat << 'END_OF_LINE' 147 | For Linux, Windows Subsystem for Linux (WSL), or macOS (if you don't want 148 | to use "brew") you can use https://github.com/pyenv/pyenv-installer to install 149 | the necessary tools. Before running this ensure that you have installed the 150 | prerequisites for your platform according to the pyenv wiki page, 151 | https://github.com/pyenv/pyenv/wiki/common-build-problems. 152 | 153 | On WSL you should treat your platform as whatever Linux distribution you've 154 | chosen to install. 155 | 156 | Once you have installed "pyenv" you will need to add the following lines to 157 | your ".bashrc": 158 | 159 | export PATH="$PATH:$HOME/.pyenv/bin" 160 | eval "$(pyenv init -)" 161 | eval "$(pyenv virtualenv-init -)" 162 | END_OF_LINE 163 | exit 1 164 | fi 165 | 166 | # Use GNU getopt to parse options 167 | if ! PARSED=$(getopt --options $SHORTOPTS --longoptions $LONGOPTS --name "$0" -- "$@"); then 168 | echo "Error parsing options" 169 | exit 1 170 | fi 171 | eval set -- "$PARSED" 172 | 173 | while true; do 174 | case "$1" in 175 | -f | --force) 176 | FORCE=1 177 | shift 178 | ;; 179 | -h | --help) 180 | echo "$USAGE" 181 | exit 0 182 | ;; 183 | -i | --install-hooks) 184 | INSTALL_HOOKS=1 185 | shift 186 | ;; 187 | -l | --list-versions) 188 | LIST_VERSIONS=1 189 | shift 190 | ;; 191 | -p | --python-version) 192 | PYTHON_VERSION="$2" 193 | shift 2 194 | # Check the Python version being passed in. 195 | check_python_version "$PYTHON_VERSION" 196 | ;; 197 | -v | --venv-name) 198 | VENV_NAME="$2" 199 | shift 2 200 | ;; 201 | --) 202 | shift 203 | break 204 | ;; 205 | *) 206 | # Unreachable due to GNU getopt handling all options 207 | echo "Programming error" 208 | exit 64 209 | ;; 210 | esac 211 | done 212 | 213 | # Determine the virtual environment name 214 | if [ -n "$VENV_NAME" ]; then 215 | # Use the user-provided environment name 216 | env_name="$VENV_NAME" 217 | else 218 | # Set the environment name to the last part of the working directory. 219 | env_name=${PWD##*/} 220 | fi 221 | 222 | # List Python versions and select one interactively. 223 | if [ $LIST_VERSIONS -ne 0 ]; then 224 | echo Available Python versions: 225 | python_versions 226 | # Read the user's desired Python version. 227 | # -r: treat backslashes as literal, -p: display prompt before input. 228 | read -r -p "Enter the desired Python version: " PYTHON_VERSION 229 | # Check the Python version being passed in. 230 | check_python_version "$PYTHON_VERSION" 231 | fi 232 | 233 | # Remove any lingering local configuration. 234 | if [ $FORCE -ne 0 ]; then 235 | rm -f .python-version 236 | pyenv virtualenv-delete --force "${env_name}" || true 237 | elif [[ -f .python-version ]]; then 238 | cat << 'END_OF_LINE' 239 | An existing .python-version file was found. Either remove this file yourself 240 | or re-run with the --force option to have it deleted along with the associated 241 | virtual environment. 242 | 243 | rm .python-version 244 | 245 | END_OF_LINE 246 | exit 1 247 | fi 248 | 249 | # Create a new virtual environment for this project 250 | # 251 | # If $PYTHON_VERSION is undefined then the current pyenv Python version will be used. 252 | # 253 | # We can't quote ${PYTHON_VERSION:=} below since if the variable is 254 | # undefined then we want nothing to appear; this is the reason for the 255 | # "shellcheck disable" line below. 256 | # 257 | # shellcheck disable=SC2086 258 | if ! pyenv virtualenv ${PYTHON_VERSION:=} "${env_name}"; then 259 | cat << END_OF_LINE 260 | An existing virtual environment named $env_name was found. Either delete this 261 | environment yourself or re-run with the --force option to have it deleted. 262 | 263 | pyenv virtualenv-delete ${env_name} 264 | 265 | END_OF_LINE 266 | exit 1 267 | fi 268 | 269 | # Set the local application-specific Python version(s) by writing the 270 | # version name to a file named `.python-version'. 271 | pyenv local "${env_name}" 272 | 273 | # Upgrade pip and friends 274 | python3 -m pip install --upgrade pip setuptools wheel 275 | 276 | # Find a requirements file (if possible) and install 277 | for req_file in "requirements-dev.txt" "requirements-test.txt" "requirements.txt"; do 278 | if [[ -f $req_file ]]; then 279 | pip install --requirement $req_file 280 | break 281 | fi 282 | done 283 | 284 | # Install git pre-commit hooks now or later. 285 | pre-commit install ${INSTALL_HOOKS:+"--install-hooks"} 286 | 287 | # Setup git remotes from lineage configuration 288 | # This could fail if the remotes are already setup, but that is ok. 289 | set +o errexit 290 | 291 | eval "$( 292 | python3 << 'END_OF_LINE' 293 | from pathlib import Path 294 | import yaml 295 | import sys 296 | 297 | LINEAGE_CONFIG = Path(".github/lineage.yml") 298 | 299 | if not LINEAGE_CONFIG.exists(): 300 | print("No lineage configuration found.", file=sys.stderr) 301 | sys.exit(0) 302 | 303 | with LINEAGE_CONFIG.open("r") as f: 304 | lineage = yaml.safe_load(stream=f) 305 | 306 | if lineage["version"] == "1": 307 | for parent_name, v in lineage["lineage"].items(): 308 | remote_url = v["remote-url"] 309 | print(f"git remote add {parent_name} {remote_url};") 310 | print(f"git remote set-url --push {parent_name} no_push;") 311 | else: 312 | print(f'Unsupported lineage version: {lineage["version"]}', file=sys.stderr) 313 | END_OF_LINE 314 | )" 315 | 316 | # Install all necessary mypy type stubs 317 | mypy --install-types --non-interactive src/ 318 | 319 | # Qapla' 320 | echo "Success!" 321 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the setup module for the pshtt project. 3 | 4 | Based on: 5 | 6 | - https://packaging.python.org/distributing/ 7 | - https://github.com/pypa/sampleproject/blob/master/setup.py 8 | - https://blog.ionelmc.ro/2014/05/25/python-packaging/#the-structure 9 | """ 10 | 11 | # Standard Python Libraries 12 | import codecs 13 | from glob import glob 14 | from os.path import abspath, basename, dirname, join, splitext 15 | 16 | # Third-Party Libraries 17 | from setuptools import find_packages, setup 18 | 19 | 20 | def readme(): 21 | """Read in and return the contents of the project's README.md file.""" 22 | with open("README.md", encoding="utf-8") as f: 23 | return f.read() 24 | 25 | 26 | # Below two methods were pulled from: 27 | # https://packaging.python.org/guides/single-sourcing-package-version/ 28 | def read(rel_path): 29 | """Open a file for reading from a given relative path.""" 30 | here = abspath(dirname(__file__)) 31 | with codecs.open(join(here, rel_path), "r") as fp: 32 | return fp.read() 33 | 34 | 35 | def get_version(version_file): 36 | """Extract a version number from the given file path.""" 37 | for line in read(version_file).splitlines(): 38 | if line.startswith("__version__"): 39 | delim = '"' if '"' in line else "'" 40 | return line.split(delim)[1] 41 | raise RuntimeError("Unable to find version string.") 42 | 43 | 44 | setup( 45 | name="pshtt", 46 | # Versions should comply with PEP440 47 | version=get_version("src/pshtt/_version.py"), 48 | description="Scan websites for HTTPS deployment best practices", 49 | long_description=readme(), 50 | long_description_content_type="text/markdown", 51 | # Landing page for CISA's cybersecurity mission 52 | url="https://www.cisa.gov/cybersecurity", 53 | # Additional URLs for this project per 54 | # https://packaging.python.org/guides/distributing-packages-using-setuptools/#project-urls 55 | project_urls={ 56 | "Source": "https://github.com/cisagov/pshtt", 57 | "Tracker": "https://github.com/cisagov/pshtt/issues", 58 | }, 59 | # Author details 60 | author="Cybersecurity and Infrastructure Security Agency", 61 | author_email="github@cisa.dhs.gov", 62 | license="License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", 63 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 64 | classifiers=[ 65 | # How mature is this project? Common values are 66 | # 3 - Alpha 67 | # 4 - Beta 68 | # 5 - Production/Stable 69 | "Development Status :: 4 - Beta", 70 | # Indicate who your project is intended for 71 | "Intended Audience :: Developers", 72 | # Pick your license as you wish (should match "license" above) 73 | "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", 74 | # Specify the Python versions you support here. In particular, ensure 75 | # that you indicate whether you support Python 2, Python 3 or both. 76 | "Programming Language :: Python :: 3", 77 | "Programming Language :: Python :: 3 :: Only", 78 | "Programming Language :: Python :: 3.7", 79 | "Programming Language :: Python :: 3.8", 80 | "Programming Language :: Python :: 3.9", 81 | "Programming Language :: Python :: 3.10", 82 | # "Programming Language :: Python :: 3.11", 83 | # "Programming Language :: Python :: 3.12", 84 | # "Programming Language :: Python :: 3.13", 85 | "Programming Language :: Python :: Implementation :: CPython", 86 | ], 87 | python_requires=">=3.7", 88 | # What does your project relate to? 89 | keywords="https best practices", 90 | packages=find_packages(where="src"), 91 | package_dir={"": "src"}, 92 | py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")], 93 | install_requires=[ 94 | "docopt>=0.6.2", 95 | "publicsuffixlist[update]>=0.9.2 ", 96 | "pyopenssl>=17.5.0", 97 | "pytablereader>=0.15.0", 98 | "pytablewriter>=0.27.2", 99 | "python-dateutil>=2.7.3", 100 | "pytz>=2018.5", 101 | "requests>=2.18.4", 102 | "setuptools", 103 | "sslyze>=3.0.0,<5.0.0", 104 | "wget>=3.2", 105 | ], 106 | extras_require={ 107 | # IMPORTANT: Keep type hinting-related dependencies of the dev section 108 | # in sync with the mypy pre-commit hook configuration (see 109 | # .pre-commit-config.yaml). Any changes to type hinting-related 110 | # dependencies here should be reflected in the additional_dependencies 111 | # field of the mypy pre-commit hook to avoid discrepancies in type 112 | # checking between environments. 113 | "dev": [ 114 | "types-docopt", 115 | "types-pyOpenSSL", 116 | "types-requests", 117 | "types-setuptools", 118 | "types-urllib3", 119 | ], 120 | "test": [ 121 | "coverage", 122 | "coveralls", 123 | "pre-commit", 124 | "pytest-cov", 125 | "pytest", 126 | ], 127 | }, 128 | # Conveniently allows one to run the CLI tool as `pshtt` 129 | entry_points={"console_scripts": ["pshtt = pshtt.cli:main"]}, 130 | ) 131 | -------------------------------------------------------------------------------- /src/pshtt/__init__.py: -------------------------------------------------------------------------------- 1 | """The pshtt library.""" 2 | 3 | # Standard Python Libraries 4 | from typing import List 5 | 6 | # We disable a Flake8 check for "Module imported but unused (F401)" here because 7 | # although this import is not directly used, it populates the value 8 | # package_name.__version__, which is used to get version information about this 9 | # Python package. 10 | from ._version import __version__ # noqa: F401 11 | 12 | __all__: List[str] = [] 13 | -------------------------------------------------------------------------------- /src/pshtt/__main__.py: -------------------------------------------------------------------------------- 1 | """Code to run if this package is used as a Python module.""" 2 | 3 | from .cli import main 4 | 5 | main() 6 | -------------------------------------------------------------------------------- /src/pshtt/_version.py: -------------------------------------------------------------------------------- 1 | """This file defines the version of this module.""" 2 | 3 | __version__ = "0.7.1" 4 | -------------------------------------------------------------------------------- /src/pshtt/cli.py: -------------------------------------------------------------------------------- 1 | """pshtt ("pushed") is a tool to test domains for HTTPS best practices. 2 | 3 | Usage: 4 | pshtt (INPUT ...) [--output OUTFILE] [--sorted] [--json] [--markdown] [--debug] [--timeout TIMEOUT] [--user-agent AGENT] [--cache-third-parties DIR] [--ca-file PATH] [--pt-int-ca-file PATH] 5 | pshtt (-h | --help) 6 | 7 | Options: 8 | -h --help Show this message. 9 | -s --sorted Sort output by domain, A-Z. 10 | -o --output=OUTFILE Name output file. (Defaults to "results".) 11 | -j --json Get results in JSON. (Defaults to CSV.) 12 | -m --markdown Get results in Markdown. (Defaults to CSV.) 13 | -d --debug Print debug output. 14 | -u --user-agent=AGENT Override user agent. 15 | -t --timeout=TIMEOUT Override timeout (in seconds). 16 | -c --cache-third-parties=DIR Cache third party data, and what directory to cache it in. 17 | -f --ca-file=PATH Specify custom CA bundle (PEM format) 18 | -p --pt-int-ca-file=PATH Specify public trust CA bundle with intermediates (PEM format) 19 | 20 | Notes: 21 | If the first INPUT ends with .csv, domains will be read from CSV. 22 | CSV output will always be written to disk, defaulting to results.csv. 23 | """ 24 | 25 | # Standard Python Libraries 26 | import csv 27 | import logging 28 | import sys 29 | 30 | # Third-Party Libraries 31 | import docopt 32 | import pytablewriter 33 | 34 | from . import pshtt, utils 35 | from ._version import __version__ 36 | from .utils import smart_open 37 | 38 | 39 | def to_csv(results, out_filename): 40 | """Output the provided results in CSV format to the provided filename.""" 41 | utils.debug("Opening CSV file: %s", out_filename) 42 | with smart_open(out_filename) as out_file: 43 | writer = csv.writer(out_file) 44 | 45 | # Write out header 46 | writer.writerow(pshtt.HEADERS) 47 | 48 | # Write out the row data as it completes 49 | for result in results: 50 | row = [result[header] for header in pshtt.HEADERS] 51 | writer.writerow(row) 52 | 53 | logging.warning("Wrote results to %s.", out_filename) 54 | 55 | 56 | def to_json(results, out_filename): 57 | """Output the provided results in JSON format to the provided filename.""" 58 | # Generate (yield) all the results before exporting to JSON 59 | results = list(results) 60 | 61 | with smart_open(out_filename) as out_file: 62 | json_content = utils.json_for(results) 63 | 64 | out_file.write(json_content + "\n") 65 | 66 | if out_file is not sys.stdout: 67 | logging.warning("Wrote results to %s.", out_filename) 68 | 69 | 70 | def to_markdown(results, out_filename): 71 | """Output the provided results in Markdown format to the provided filename.""" 72 | # Generate (yield) all the results before exporting to Markdown 73 | table = [[f" {result[header]}" for header in pshtt.HEADERS] for result in results] 74 | 75 | utils.debug("Printing Markdown...", divider=True) 76 | with smart_open(out_filename) as out_file: 77 | writer = pytablewriter.MarkdownTableWriter() 78 | 79 | writer.header_list = pshtt.HEADERS 80 | writer.value_matrix = table 81 | writer.stream = out_file 82 | 83 | writer.write_table() 84 | 85 | 86 | def main(): 87 | """Provide a command line interface to the pshtt library.""" 88 | args = docopt.docopt(__doc__, version=__version__) 89 | utils.configure_logging(args["--debug"]) 90 | 91 | out_filename = args["--output"] 92 | 93 | # Read from a .csv, or allow domains on the command line. 94 | domains = [] 95 | if args["INPUT"][0].endswith(".csv"): 96 | domains = utils.load_domains(args["INPUT"][0]) 97 | else: 98 | domains = args["INPUT"] 99 | 100 | domains = utils.format_domains(domains) 101 | 102 | # If the user wants to sort them, sort them in place. 103 | if args["--sorted"]: 104 | domains.sort() 105 | 106 | options = { 107 | "user_agent": args["--user-agent"], 108 | "timeout": args["--timeout"], 109 | "cache-third-parties": args["--cache-third-parties"], 110 | "ca_file": args["--ca-file"], 111 | "pt_int_ca_file": args["--pt-int-ca-file"], 112 | } 113 | 114 | # Do the domain inspections 115 | results = pshtt.inspect_domains(domains, options) 116 | 117 | # JSON can go to STDOUT, or to a file. 118 | if args["--json"]: 119 | to_json(results, out_filename) 120 | 121 | # Markdown can go to STDOUT, or to a file 122 | elif args["--markdown"]: 123 | to_markdown(results, out_filename) 124 | 125 | # CSV always goes to a file. 126 | else: 127 | if out_filename is None: 128 | out_filename = "results.csv" 129 | 130 | to_csv(results, out_filename) 131 | -------------------------------------------------------------------------------- /src/pshtt/models.py: -------------------------------------------------------------------------------- 1 | """Define the models used in this library.""" 2 | 3 | 4 | class Domain: 5 | """Define the domain model.""" 6 | 7 | def __init__(self, domain): 8 | """Initialize the model.""" 9 | self.domain = domain 10 | 11 | # 4 endpoints for each domain. 12 | self.http = None 13 | self.httpwww = None 14 | self.https = None 15 | self.httpswww = None 16 | self.unknown_error = False 17 | 18 | # Filled in after analyzing each endpoint. 19 | self.canonical = None 20 | 21 | def to_object(self): 22 | """Convert the model to a dictionary.""" 23 | return { 24 | "https": self.https.to_object(), 25 | "httpswww": self.httpswww.to_object(), 26 | "http": self.http.to_object(), 27 | "httpwww": self.httpwww.to_object(), 28 | } 29 | 30 | 31 | class Endpoint: 32 | """Define the endpoint model.""" 33 | 34 | def __init__(self, protocol, host, base_domain): 35 | """Initialize the model.""" 36 | # Basic endpoint description 37 | self.protocol = protocol 38 | self.host = host # "www" or "root" 39 | self.base_domain = base_domain 40 | self.url = self.url_for() 41 | 42 | # all HTTP/HTTPS endpoints have these 43 | self.headers = ( 44 | {} 45 | ) # will be replaced with a requests.structures.CaseInsensitiveDict 46 | self.status = None 47 | self.live = None 48 | self.ip = None 49 | self.redirect = None 50 | self.server_header = None 51 | self.server_version = None 52 | self.unknown_error = False 53 | self.notes = "" 54 | 55 | # If an endpoint redirects, characterize the redirect behavior 56 | self.redirect_immediately_to = None 57 | self.redirect_immediately_to_www = None 58 | self.redirect_immediately_to_https = None 59 | self.redirect_immediately_to_http = None 60 | self.redirect_immediately_to_external = None 61 | self.redirect_immediately_to_subdomain = None 62 | self.redirect_eventually_to = None 63 | self.redirect_eventually_to_https = None 64 | self.redirect_eventually_to_http = None 65 | self.redirect_eventually_to_external = None 66 | self.redirect_eventually_to_subdomain = None 67 | 68 | # Only HTTPS endpoints have these. 69 | # Initialize all of them to None, so that it's 70 | # discernible if they don't get explicitly set. 71 | self.https_full_connection = None 72 | self.https_client_auth_required = False 73 | self.https_valid = None 74 | self.https_public_trusted = None 75 | self.https_custom_trusted = None 76 | self.https_bad_chain = None 77 | self.https_bad_hostname = None 78 | self.https_expired_cert = None 79 | self.https_self_signed_cert = None 80 | self.https_cert_chain_len = None 81 | self.https_missing_intermediate_cert = None 82 | self.hsts = None 83 | self.hsts_header = None 84 | self.hsts_max_age = None 85 | self.hsts_all_subdomains = None 86 | self.hsts_preload = None 87 | self.hsts_preloaded = None 88 | 89 | def url_for(self): 90 | """Return an appropriately formatted URL for the base domain.""" 91 | if self.host == "root": 92 | prefix = "" 93 | elif self.host == "www": 94 | prefix = "www." 95 | 96 | return f"{self.protocol}://{prefix}{self.base_domain}" 97 | 98 | # The fields we want to serialize to JSON. 99 | def to_object(self): 100 | """Convert the model to a dictionary.""" 101 | obj = { 102 | "url": self.url, 103 | "headers": dict(self.headers), 104 | "status": self.status, 105 | "ip": self.ip, 106 | "live": self.live, 107 | "redirect": self.redirect, 108 | "redirect_eventually_to": self.redirect_eventually_to, 109 | "redirect_immediately_to": self.redirect_immediately_to, 110 | "redirect_immediately_to_www": self.redirect_immediately_to_www, 111 | "redirect_immediately_to_https": self.redirect_immediately_to_https, 112 | "redirect_immediately_to_http": self.redirect_immediately_to_http, 113 | "redirect_immediately_to_external": self.redirect_immediately_to_external, 114 | "redirect_immediately_to_subdomain": self.redirect_immediately_to_subdomain, 115 | "redirect_eventually_to_https": self.redirect_eventually_to_https, 116 | "redirect_eventually_to_http": self.redirect_eventually_to_http, 117 | "redirect_eventually_to_external": self.redirect_eventually_to_external, 118 | "redirect_eventually_to_subdomain": self.redirect_eventually_to_subdomain, 119 | "server_header": self.server_header, 120 | "server_version": self.server_version, 121 | "notes": self.notes, 122 | "unknown_error": self.unknown_error, 123 | } 124 | 125 | if self.protocol == "https": 126 | obj["https_full_connection"] = self.https_full_connection 127 | obj["https_client_auth_required"] = self.https_client_auth_required 128 | obj["https_valid"] = self.https_valid 129 | obj["https_public_trusted"] = self.https_public_trusted 130 | obj["https_custom_trusted"] = self.https_custom_trusted 131 | obj["https_bad_chain"] = self.https_bad_chain 132 | obj["https_bad_hostname"] = self.https_bad_hostname 133 | obj["https_expired_cert"] = self.https_expired_cert 134 | obj["https_self_signed_cert"] = self.https_self_signed_cert 135 | obj["https_cert_chain_len"] = self.https_cert_chain_len 136 | obj["https_missing_intermediate_cert"] = ( 137 | self.https_missing_intermediate_cert 138 | ) 139 | obj["hsts"] = self.hsts 140 | obj["hsts_header"] = self.hsts_header 141 | obj["hsts_max_age"] = self.hsts_max_age 142 | obj["hsts_all_subdomains"] = self.hsts_all_subdomains 143 | obj["hsts_preload"] = self.hsts_preload 144 | 145 | return obj 146 | -------------------------------------------------------------------------------- /src/pshtt/utils.py: -------------------------------------------------------------------------------- 1 | """Define utility functions for the pshtt library.""" 2 | 3 | # Standard Python Libraries 4 | import contextlib 5 | import csv 6 | import datetime 7 | import errno 8 | import json 9 | import logging 10 | import os 11 | import re 12 | import sys 13 | import traceback 14 | 15 | 16 | # Display exception without re-throwing it. 17 | def format_last_exception(): 18 | """Pretty format the last raised exception.""" 19 | exc_type, exc_value, exc_traceback = sys.exc_info() 20 | return "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) 21 | 22 | 23 | # mkdir -p in python, from: 24 | # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python 25 | def mkdir_p(path): 26 | """Make a directory and any missing directories in the path.""" 27 | try: 28 | os.makedirs(path) 29 | except OSError as exc: # Python >2.5 30 | if exc.errno == errno.EEXIST: 31 | pass 32 | else: 33 | raise 34 | 35 | 36 | def json_for(data): 37 | """Pretty format the given object to JSON.""" 38 | return json.dumps(data, sort_keys=True, indent=2, default=format_datetime) 39 | 40 | 41 | def write(content, destination, binary=False): 42 | """Write contents to a destination after making any missing directories.""" 43 | parent = os.path.dirname(destination) 44 | if parent != "": 45 | mkdir_p(parent) 46 | 47 | with ( 48 | open(destination, "bw") if binary else open(destination, "w", encoding="utf-8") 49 | ) as f: 50 | f.write(content) 51 | 52 | 53 | def format_datetime(obj): 54 | """Provide a formatted datetime.""" 55 | if isinstance(obj, datetime.date): 56 | return obj.isoformat() 57 | if isinstance(obj, str): 58 | return obj 59 | return None 60 | 61 | 62 | # Load domains from a CSV, skip a header row 63 | def load_domains(domain_csv): 64 | """Load a list of domains from a CSV file.""" 65 | domains = [] 66 | with open(domain_csv, encoding="utf-8") as csvfile: 67 | for row in csv.reader(csvfile): 68 | # Skip empty rows. 69 | if not row or not row[0].strip(): 70 | continue 71 | 72 | row[0] = row[0].lower() 73 | # Skip any header row. 74 | if not domains and row[0].startswith("domain"): 75 | continue 76 | 77 | domains.append(row[0]) 78 | return domains 79 | 80 | 81 | # Configure logging level, so logging.debug can hinge on --debug. 82 | def configure_logging(debug_logging=False): 83 | """Configure the logging library.""" 84 | log_level = logging.DEBUG if debug_logging else logging.WARNING 85 | logging.basicConfig(format="%(message)s", level=log_level) 86 | 87 | 88 | def format_domains(domains): 89 | """Format a given list of domains.""" 90 | formatted_domains = [] 91 | 92 | for domain in domains: 93 | # Replace a single instance of http://, https://, and www. if present. 94 | formatted_domains.append(re.sub(r"^(https?://)?(www\.)?", "", domain)) 95 | 96 | return formatted_domains 97 | 98 | 99 | def debug(*args, divider=False): 100 | """Output a debugging message.""" 101 | if divider: 102 | logging.debug("\n-------------------------\n") 103 | 104 | if args: 105 | logging.debug(*args) 106 | 107 | 108 | @contextlib.contextmanager 109 | def smart_open(filename=None): 110 | """Context manager that can handle writing to a file or stdout. 111 | 112 | Adapted from: https://stackoverflow.com/a/17603000 113 | """ 114 | handle = sys.stdout if filename is None else open(filename, "w", encoding="utf-8") 115 | 116 | try: 117 | yield handle 118 | finally: 119 | if handle is not sys.stdout: 120 | handle.close() 121 | -------------------------------------------------------------------------------- /tag.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | set -o pipefail 6 | 7 | version=$(./bump_version.sh show) 8 | 9 | git tag "v$version" && git push --tags 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """pytest plugin configuration. 2 | 3 | https://docs.pytest.org/en/latest/writing_plugins.html#conftest-py-plugins 4 | """ 5 | 6 | # Third-Party Libraries 7 | import pytest 8 | 9 | 10 | def pytest_addoption(parser): 11 | """Add new commandline options to pytest.""" 12 | parser.addoption( 13 | "--runslow", action="store_true", default=False, help="run slow tests" 14 | ) 15 | 16 | 17 | def pytest_configure(config): 18 | """Register new markers.""" 19 | config.addinivalue_line("markers", "slow: mark test as slow") 20 | 21 | 22 | def pytest_collection_modifyitems(config, items): 23 | """Modify collected tests based on custom marks and commandline options.""" 24 | if config.getoption("--runslow"): 25 | # --runslow given in cli: do not skip slow tests 26 | return 27 | skip_slow = pytest.mark.skip(reason="need --runslow option to run") 28 | for item in items: 29 | if "slow" in item.keywords: 30 | item.add_marker(skip_slow) 31 | -------------------------------------------------------------------------------- /tests/test_badssl.py: -------------------------------------------------------------------------------- 1 | """Test bad SSL results from a domain.""" 2 | 3 | # Standard Python Libraries 4 | import unittest 5 | 6 | # cisagov Libraries 7 | from pshtt.models import Domain, Endpoint 8 | from pshtt.pshtt import basic_check, hsts_check 9 | 10 | 11 | def inspect(base_domain): 12 | """Populate a domain model with the provided domain.""" 13 | domain = Domain(base_domain) 14 | domain.http = Endpoint("http", "root", base_domain) 15 | domain.httpwww = Endpoint("http", "www", base_domain) 16 | domain.https = Endpoint("https", "root", base_domain) 17 | domain.httpswww = Endpoint("https", "www", base_domain) 18 | 19 | return domain 20 | 21 | # Analyze HTTP endpoint responsiveness and behavior. 22 | basic_check(domain.http) 23 | basic_check(domain.httpwww) 24 | basic_check(domain.https) 25 | basic_check(domain.httpswww) 26 | 27 | # Analyze HSTS header, if present, on each HTTPS endpoint. 28 | hsts_check(domain.https) 29 | hsts_check(domain.httpswww) 30 | 31 | return domain 32 | 33 | 34 | @unittest.skip("Disable live tests against badssl for now") 35 | class TestCertificate(unittest.TestCase): 36 | """Test different bad certificate results.""" 37 | 38 | def test_https_expired(self): 39 | """Test when the certificate has expired.""" 40 | domain = inspect("expired.badssl.com") 41 | basic_check(domain.https) 42 | 43 | self.assertTrue(domain.https.https_expired_cert) 44 | 45 | def test_https_bad_hostname(self): 46 | """Test when the certificate has a bad hostname.""" 47 | domain = inspect("wrong.host.badssl.com") 48 | basic_check(domain.https) 49 | 50 | self.assertTrue(domain.https.https_bad_hostname) 51 | 52 | def test_https_bad_chain(self): 53 | """Test when there is a bad chain of trust for a certificate.""" 54 | domain = inspect("untrusted-root.badssl.com") 55 | basic_check(domain.https) 56 | 57 | self.assertTrue(domain.https.https_bad_chain) 58 | 59 | def test_https_self_signed_cert(self): 60 | """Test when a certificate is self-signed.""" 61 | domain = inspect("self-signed.badssl.com") 62 | basic_check(domain.https) 63 | 64 | self.assertTrue(domain.https.https_self_signed_cert) 65 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """Test the command line interface functionality of the library.""" 2 | 3 | # Standard Python Libraries 4 | import os 5 | import sys 6 | import tempfile 7 | import unittest 8 | 9 | # cisagov Libraries 10 | from pshtt import pshtt as _pshtt 11 | from pshtt.cli import to_csv 12 | from pshtt.models import Domain, Endpoint 13 | 14 | 15 | class FakeSuffixList: 16 | """Test against a fake suffix list.""" 17 | 18 | def get_public_suffix(self, hostname, *args, **kwargs): 19 | """Return the public suffix of a hostname.""" 20 | return hostname 21 | 22 | 23 | # Artificially setup the the preload and suffix lists 24 | # This should be irrelevant after #126 is decided upon / merged 25 | _pshtt.SUFFIX_LIST = FakeSuffixList() 26 | _pshtt.PRELOAD_LIST = [] 27 | _pshtt.PRELOAD_PENDING = [] 28 | 29 | 30 | class TestToCSV(unittest.TestCase): 31 | """Test the CSV output of the command line interface.""" 32 | 33 | @classmethod 34 | def setUpClass(cls): 35 | """Perform initial setup.""" 36 | base_domain = "example.com" 37 | 38 | domain = Domain(base_domain) 39 | domain.http = Endpoint("http", "root", base_domain) 40 | domain.httpwww = Endpoint("http", "www", base_domain) 41 | domain.https = Endpoint("https", "root", base_domain) 42 | domain.httpswww = Endpoint("https", "www", base_domain) 43 | 44 | cls.results = _pshtt.result_for(domain) 45 | cls.temp_filename = os.path.join(tempfile.gettempdir(), "results.csv") 46 | 47 | @unittest.skipIf(sys.version_info[0] < 3, "Python 3 test only") 48 | def test_no_results(self): 49 | """Test when there are no results.""" 50 | to_csv([], self.temp_filename) 51 | 52 | with open(self.temp_filename) as fh: 53 | content = fh.read() 54 | 55 | expected = ",".join(_pshtt.HEADERS) + "\n" 56 | 57 | self.assertEqual(content, expected) 58 | 59 | @unittest.skipIf(sys.version_info[0] < 3, "Python 3 test only") 60 | def test_single_result(self): 61 | """Test a single domain result.""" 62 | to_csv([self.results], self.temp_filename) 63 | 64 | with open(self.temp_filename) as fh: 65 | content = fh.read() 66 | 67 | domain_data = [ 68 | ("Domain", "example.com"), 69 | ("Base Domain", "example.com"), 70 | ("Canonical URL", "http://example.com"), 71 | ("Live", "False"), 72 | ("HTTPS Live", "False"), 73 | ("HTTPS Full Connection", "False"), 74 | ("HTTPS Client Auth Required", "False"), 75 | ("Redirect", "False"), 76 | ("Redirect To", ""), 77 | ("Valid HTTPS", "False"), 78 | ("HTTPS Publicly Trusted", "False"), 79 | ("HTTPS Custom Truststore Trusted", "False"), 80 | ("Defaults to HTTPS", "False"), 81 | ("Downgrades HTTPS", "False"), 82 | ("Strictly Forces HTTPS", "False"), 83 | ("HTTPS Bad Chain", "False"), 84 | ("HTTPS Bad Hostname", "False"), 85 | ("HTTPS Expired Cert", "False"), 86 | ("HTTPS Self Signed Cert", "False"), 87 | ("HSTS", ""), 88 | ("HSTS Header", ""), 89 | ("HSTS Max Age", ""), 90 | ("HSTS Entire Domain", ""), 91 | ("HSTS Preload Ready", "False"), 92 | ("HSTS Preload Pending", "False"), 93 | ("HSTS Preloaded", "False"), 94 | ("Base Domain HSTS Preloaded", "False"), 95 | ("Domain Supports HTTPS", "False"), 96 | ("Domain Enforces HTTPS", "False"), 97 | ("Domain Uses Strong HSTS", ""), 98 | ("IP", ""), 99 | ("Server Header", ""), 100 | ("Server Version", ""), 101 | ("HTTPS Cert Chain Length", ""), 102 | ("HTTPS Probably Missing Intermediate Cert", "False"), 103 | ("Notes", ""), 104 | ("Unknown Error", "False"), 105 | ] 106 | 107 | self.maxDiff = None 108 | 109 | header = ",".join(t[0] for t in domain_data) 110 | values = ",".join(t[1] for t in domain_data) 111 | expected = header + "\n" + values + "\n" 112 | self.assertEqual(content, expected) 113 | 114 | # Sanity check that this hard coded data has the same headers as defined 115 | # in the package. This should never fail, as the above assert should 116 | # catch any changes in the header columns. 117 | self.assertEqual(header, ",".join(_pshtt.HEADERS)) 118 | -------------------------------------------------------------------------------- /tests/test_definitions.py: -------------------------------------------------------------------------------- 1 | """Test the library's models.""" 2 | 3 | # Standard Python Libraries 4 | import unittest 5 | 6 | # cisagov Libraries 7 | from pshtt import pshtt as api 8 | from pshtt.models import Domain, Endpoint 9 | 10 | 11 | class TestUsesHTTPS(unittest.TestCase): 12 | """Test for a domain using HTTPS.""" 13 | 14 | def setUp(self): 15 | """Perform initial setup.""" 16 | base_domain = "example.com" 17 | self.domain = Domain(base_domain) 18 | 19 | self.domain.http = Endpoint("http", "root", base_domain) 20 | self.domain.httpwww = Endpoint("http", "www", base_domain) 21 | self.domain.https = Endpoint("https", "root", base_domain) 22 | self.domain.httpswww = Endpoint("https", "www", base_domain) 23 | 24 | @unittest.skip("Still working on definition") 25 | def test_definition(self): 26 | """Test the definition of a domain using HTTPS.""" 27 | self.domain.https.live = True 28 | self.domain.https.https_valid = True 29 | self.domain.https.https_valid = True 30 | 31 | self.assertTrue(api.is_domain_supports_https(self.domain)) 32 | 33 | 34 | class TestBadChain(unittest.TestCase): 35 | """Test for a bad certificate chain.""" 36 | 37 | def setUp(self): 38 | """Perform initial setup.""" 39 | base_domain = "example.com" 40 | self.domain = Domain(base_domain) 41 | 42 | self.domain.http = Endpoint("http", "root", base_domain) 43 | self.domain.httpwww = Endpoint("http", "www", base_domain) 44 | self.domain.https = Endpoint("https", "root", base_domain) 45 | self.domain.httpswww = Endpoint("https", "www", base_domain) 46 | 47 | def test_bad_chain_root(self): 48 | """Test the root domain name.""" 49 | self.domain.https.https_bad_chain = True 50 | self.domain.canonical = self.domain.https 51 | 52 | self.assertTrue(api.is_bad_chain(self.domain)) 53 | 54 | def test_bad_chain_www(self): 55 | """Test the www prefixed domain name.""" 56 | self.domain.httpswww.https_bad_chain = True 57 | self.domain.canonical = self.domain.httpswww 58 | 59 | self.assertTrue(api.is_bad_chain(self.domain)) 60 | 61 | def test_bad_chain_both(self): 62 | """Test both the root and www prefixed domain name.""" 63 | self.domain.https.https_bad_chain = True 64 | self.domain.httpswww.https_bad_chain = True 65 | 66 | self.domain.canonical = self.domain.https 67 | self.assertTrue(api.is_bad_chain(self.domain)) 68 | 69 | self.domain.canonical = self.domain.httpswww 70 | self.assertTrue(api.is_bad_chain(self.domain)) 71 | 72 | 73 | class TestBadHostname(unittest.TestCase): 74 | """Verify the bad hostname check.""" 75 | 76 | def setUp(self): 77 | """Perform initial setup.""" 78 | base_domain = "example.com" 79 | self.domain = Domain(base_domain) 80 | 81 | self.domain.http = Endpoint("http", "root", base_domain) 82 | self.domain.httpwww = Endpoint("http", "www", base_domain) 83 | self.domain.https = Endpoint("https", "root", base_domain) 84 | self.domain.httpswww = Endpoint("https", "www", base_domain) 85 | 86 | def test_bad_hostname_root(self): 87 | """Test using the base domain name.""" 88 | self.domain.https.https_bad_hostname = True 89 | self.domain.canonical = self.domain.https 90 | 91 | self.assertTrue(api.is_bad_hostname(self.domain)) 92 | 93 | def test_bad_hostname_www(self): 94 | """Test using the www prefixed domain name.""" 95 | self.domain.httpswww.https_bad_hostname = True 96 | self.domain.canonical = self.domain.httpswww 97 | 98 | self.assertTrue(api.is_bad_hostname(self.domain)) 99 | 100 | def test_bad_hostname_both(self): 101 | """Test both the root and www prefixed domain name.""" 102 | self.domain.https.https_bad_hostname = True 103 | self.domain.httpswww.https_bad_hostname = True 104 | 105 | self.domain.canonical = self.domain.https 106 | self.assertTrue(api.is_bad_hostname(self.domain)) 107 | 108 | self.domain.canonical = self.domain.httpswww 109 | self.assertTrue(api.is_bad_hostname(self.domain)) 110 | -------------------------------------------------------------------------------- /tests/test_pshtt.py: -------------------------------------------------------------------------------- 1 | """Test the core functionality of the library.""" 2 | 3 | # Standard Python Libraries 4 | import unittest 5 | 6 | # cisagov Libraries 7 | from pshtt.models import Domain, Endpoint 8 | from pshtt.pshtt import is_live 9 | 10 | 11 | class TestLiveliness(unittest.TestCase): 12 | """Test the liveliness of a domain.""" 13 | 14 | def setUp(self): 15 | """Perform initial setup.""" 16 | base_domain = "example.com" 17 | self.domain = Domain(base_domain) 18 | 19 | self.domain.http = Endpoint("http", "root", base_domain) 20 | self.domain.httpwww = Endpoint("http", "www", base_domain) 21 | self.domain.https = Endpoint("https", "root", base_domain) 22 | self.domain.httpswww = Endpoint("https", "www", base_domain) 23 | 24 | def test_none(self): 25 | """Test in an unchecked state.""" 26 | self.assertFalse(is_live(self.domain)) 27 | 28 | def test_http_only(self): 29 | """Test when only HTTP access is live on the base domain name.""" 30 | self.domain.http.live = True 31 | 32 | self.assertTrue(is_live(self.domain)) 33 | 34 | def test_https_only(self): 35 | """Test when only HTTPS access is live on the base domain name.""" 36 | self.domain.https.live = True 37 | 38 | self.assertTrue(is_live(self.domain)) 39 | 40 | def test_httpwww_only(self): 41 | """Test when only HTTP access is live on the www prefixed domain name.""" 42 | self.domain.httpwww.live = True 43 | 44 | self.assertTrue(is_live(self.domain)) 45 | 46 | def test_httpswww_only(self): 47 | """Test when only HTTPS access is live on the www prefixed domain name.""" 48 | self.domain.httpswww.live = True 49 | 50 | self.assertTrue(is_live(self.domain)) 51 | 52 | def test_http_both(self): 53 | """Test when only HTTP access is live on both domain names.""" 54 | self.domain.http.live = True 55 | self.domain.httpwww.live = True 56 | 57 | self.assertTrue(is_live(self.domain)) 58 | 59 | def test_https_both(self): 60 | """Test when only HTTPS access is live on both domain names.""" 61 | self.domain.https.live = True 62 | self.domain.httpswww.live = True 63 | 64 | self.assertTrue(is_live(self.domain)) 65 | 66 | def test_www_neither(self): 67 | """Test when both HTTP and HTTPS are live on only the base domain.""" 68 | self.domain.http.live = True 69 | self.domain.https.live = True 70 | 71 | self.assertTrue(is_live(self.domain)) 72 | 73 | def test_www_both(self): 74 | """Test when both HTTP and HTTPS are live on the www prefixed domain name.""" 75 | self.domain.httpwww.live = True 76 | self.domain.httpswww.live = True 77 | 78 | self.assertTrue(is_live(self.domain)) 79 | 80 | def test_all(self): 81 | """Test when both HTTP and HTTPS are live on both domain names.""" 82 | self.domain.http.live = True 83 | self.domain.https.live = True 84 | self.domain.httpwww.live = True 85 | self.domain.httpswww.live = True 86 | 87 | self.assertTrue(is_live(self.domain)) 88 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | """Test the utility functions for the pshtt library.""" 2 | 3 | # Standard Python Libraries 4 | import os 5 | import sys 6 | import tempfile 7 | import unittest 8 | 9 | # cisagov Libraries 10 | from pshtt.utils import smart_open 11 | 12 | 13 | class TestSmartOpen(unittest.TestCase): 14 | """Test the functionality of the smart_open function.""" 15 | 16 | def test_without_filename(self): 17 | """Test that standard out is used if no filename is provided.""" 18 | with smart_open() as fh: 19 | self.assertIs(fh, sys.stdout) 20 | 21 | def test_with_empty_filename(self): 22 | """Test when an empty string is provided as a filename. 23 | 24 | Should raise a `FileNotFoundError` 25 | """ 26 | with self.assertRaises(FileNotFoundError): # noqa 27 | with smart_open(""): 28 | pass 29 | 30 | def test_with_real_filename(self): 31 | """Test when a valid string is provided as a filename.""" 32 | test_data = "This is the test data" 33 | 34 | with tempfile.TemporaryDirectory() as tmp_dirname: 35 | # Make a temporary file to use 36 | filename = os.path.join(tmp_dirname, "foo") 37 | 38 | with smart_open(filename) as fh: 39 | fh.write(test_data) 40 | 41 | self.assertEqual(test_data, open(filename).read()) 42 | --------------------------------------------------------------------------------