├── .ansible-lint
├── .bandit.yml
├── .coveragerc
├── .flake8
├── .github
    ├── CODEOWNERS
    ├── dependabot.yml
    ├── labels.yml
    ├── lineage.yml
    └── workflows
    │   ├── build.yml
    │   ├── codeql-analysis.yml
    │   ├── dependency-review.yml
    │   └── sync-labels.yml
├── .gitignore
├── .isort.cfg
├── .mdl_config.yaml
├── .pre-commit-config.yaml
├── .prettierignore
├── .yamllint
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── bump-version
├── gce-scripts
    ├── README.md
    ├── check_instances.sh
    ├── combine_shards.py
    ├── grab_and_combine_data.sh
    ├── packages_to_install.sh
    ├── run_all_scripts.sh
    ├── run_instances.sh
    ├── running_script.sh
    ├── scp_and_setup.sh
    └── split_up_dataset.sh
├── pytest.ini
├── requirements-dev.txt
├── requirements-test.txt
├── requirements.txt
├── setup-env
├── setup.py
├── src
    └── pshtt
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── _version.py
    │   ├── cli.py
    │   ├── models.py
    │   ├── pshtt.py
    │   └── utils.py
├── tag.sh
└── tests
    ├── conftest.py
    ├── test_badssl.py
    ├── test_cli.py
    ├── test_definitions.py
    ├── test_pshtt.py
    └── test_utils.py


/.ansible-lint:
--------------------------------------------------------------------------------
 1 | ---
 2 | # See https://ansible-lint.readthedocs.io/configuring/ for a list of
 3 | # the configuration elements that can exist in this file.
 4 | enable_list:
 5 |   # Useful checks that one must opt-into.  See here for more details:
 6 |   # https://ansible-lint.readthedocs.io/rules/
 7 |   - fcqn-builtins
 8 |   - no-log-password
 9 |   - no-same-owner
10 | exclude_paths:
11 |   # This exclusion is implicit, unless exclude_paths is defined
12 |   - .cache
13 |   # Seems wise to ignore this too
14 |   - .github
15 | kinds:
16 |   # This will force our systemd specific molecule configurations to be treated
17 |   # as plain yaml files by ansible-lint. This mirrors the default kind
18 |   # configuration in ansible-lint for molecule configurations:
19 |   # yaml: "**/molecule/*/{base,molecule}.{yaml,yml}"
20 |   - yaml: "**/molecule/*/molecule-{no,with}-systemd.yml"
21 | use_default_rules: true
22 | 


--------------------------------------------------------------------------------
/.bandit.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Configuration file for the Bandit python security scanner
 3 | # https://bandit.readthedocs.io/en/latest/config.html
 4 | # This config is applied to bandit when scanning the "tests" tree
 5 | 
 6 | # Tests are first included by `tests`, and then excluded by `skips`.
 7 | # If `tests` is empty, all tests are considered included.
 8 | 
 9 | tests:
10 | # - B101
11 | # - B102
12 | 
13 | skips:
14 |   - B101  # skip "assert used" check since assertions are required in pytests
15 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | # This is the configuration for code coverage checks
 2 | # https://coverage.readthedocs.io/en/latest/config.html
 3 | 
 4 | [run]
 5 | source = src/pshtt
 6 | omit =
 7 | branch = true
 8 | 
 9 | [report]
10 | exclude_lines =
11 |     if __name__ == "__main__":
12 | show_missing = true
13 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 80
 3 | # Select (turn on)
 4 | # * Complexity violations reported by mccabe (C) -
 5 | #   http://flake8.pycqa.org/en/latest/user/error-codes.html#error-violation-codes
 6 | # * Documentation conventions compliance reported by pydocstyle (D) -
 7 | #   http://www.pydocstyle.org/en/stable/error_codes.html
 8 | # * Default errors and warnings reported by pycodestyle (E and W) -
 9 | #   https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes
10 | # * Default errors reported by pyflakes (F) -
11 | #   http://flake8.pycqa.org/en/latest/glossary.html#term-pyflakes
12 | # * Default warnings reported by flake8-bugbear (B) -
13 | #   https://github.com/PyCQA/flake8-bugbear#list-of-warnings
14 | # * The B950 flake8-bugbear opinionated warning -
15 | #   https://github.com/PyCQA/flake8-bugbear#opinionated-warnings
16 | select = C,D,E,F,W,B,B950
17 | # Ignore flake8's default warning about "whitespace before ':'" as it is not
18 | # PEP 8 compliant and conflicts with black's styling.
19 | #
20 | # Ignore flake8's default warning about maximum line length, which has
21 | # a hard stop at the configured value.  Instead we use
22 | # flake8-bugbear's B950, which allows up to 10% overage.
23 | #
24 | # Also ignore flake8's warning about line breaks before binary
25 | # operators.  It no longer agrees with PEP8.  See, for example, here:
26 | # https://github.com/ambv/black/issues/21. Guido agrees here:
27 | # https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b.
28 | ignore = E203,E501,W503
29 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # Each line is a file pattern followed by one or more owners.
 2 | 
 3 | # These owners will be the default owners for everything in the
 4 | # repo. Unless a later match takes precedence, these owners will be
 5 | # requested for review when someone opens a pull request.
 6 | * @dav3r @felddy @IanLee1521 @jsf9k @mcdonnnj
 7 | 
 8 | # These folks own any files in the .github directory at the root of
 9 | # the repository and any of its subdirectories.
10 | /.github/ @dav3r @felddy @jsf9k @mcdonnnj
11 | 
12 | # These folks own all linting configuration files.
13 | /.ansible-lint @dav3r @felddy @jsf9k @mcdonnnj
14 | /.bandit.yml @dav3r @felddy @jsf9k @mcdonnnj
15 | /.flake8 @dav3r @felddy @jsf9k @mcdonnnj
16 | /.isort.cfg @dav3r @felddy @jsf9k @mcdonnnj
17 | /.mdl_config.yaml @dav3r @felddy @jsf9k @mcdonnnj
18 | /.pre-commit-config.yaml @dav3r @felddy @jsf9k @mcdonnnj
19 | /.prettierignore @dav3r @felddy @jsf9k @mcdonnnj
20 | /.yamllint @dav3r @felddy @jsf9k @mcdonnnj
21 | /requirements.txt @dav3r @felddy @jsf9k @mcdonnnj
22 | /requirements-dev.txt @dav3r @felddy @jsf9k @mcdonnnj
23 | /requirements-test.txt @dav3r @felddy @jsf9k @mcdonnnj
24 | /setup-env @dav3r @felddy @jsf9k @mcdonnnj
25 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | # Any ignore directives should be uncommented in downstream projects to disable
 4 | # Dependabot updates for the given dependency. Downstream projects will get
 5 | # these updates when the pull request(s) in the appropriate skeleton are merged
 6 | # and Lineage processes these changes.
 7 | 
 8 | updates:
 9 |   - directory: /
10 |     ignore:
11 |       # Managed by cisagov/skeleton-generic
12 |       - dependency-name: actions/cache
13 |       - dependency-name: actions/checkout
14 |       - dependency-name: actions/dependency-review-action
15 |       - dependency-name: actions/setup-go
16 |       - dependency-name: actions/setup-python
17 |       - dependency-name: cisagov/action-job-preamble
18 |       - dependency-name: cisagov/setup-env-github-action
19 |       - dependency-name: crazy-max/ghaction-github-labeler
20 |       - dependency-name: github/codeql-action
21 |       - dependency-name: hashicorp/setup-packer
22 |       - dependency-name: hashicorp/setup-terraform
23 |       - dependency-name: mxschmitt/action-tmate
24 |       # Managed by cisagov/skeleton-python-library
25 |       - dependency-name: actions/download-artifact
26 |       - dependency-name: actions/upload-artifact
27 |     package-ecosystem: github-actions
28 |     schedule:
29 |       interval: weekly
30 | 
31 |   - directory: /
32 |     package-ecosystem: pip
33 |     schedule:
34 |       interval: weekly
35 | 
36 |   - directory: /
37 |     package-ecosystem: terraform
38 |     schedule:
39 |       interval: weekly
40 | version: 2
41 | 


--------------------------------------------------------------------------------
/.github/labels.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Rather than breaking up descriptions into multiline strings we disable that
 3 | # specific rule in yamllint for this file.
 4 | # yamllint disable rule:line-length
 5 | - color: eb6420
 6 |   description: This issue or pull request is awaiting the outcome of another issue or pull request
 7 |   name: blocked
 8 | - color: "000000"
 9 |   description: This issue or pull request involves changes to existing functionality
10 |   name: breaking change
11 | - color: d73a4a
12 |   description: This issue or pull request addresses broken functionality
13 |   name: bug
14 | - color: 07648d
15 |   description: This issue will be advertised on code.gov's Open Tasks page (https://code.gov/open-tasks)
16 |   name: code.gov
17 | - color: 0366d6
18 |   description: Pull requests that update a dependency file
19 |   name: dependencies
20 | - color: 5319e7
21 |   description: This issue or pull request improves or adds to documentation
22 |   name: documentation
23 | - color: cfd3d7
24 |   description: This issue or pull request already exists or is covered in another issue or pull request
25 |   name: duplicate
26 | - color: b005bc
27 |   description: A high-level objective issue encompassing multiple issues instead of a specific unit of work
28 |   name: epic
29 | - color: "000000"
30 |   description: Pull requests that update GitHub Actions code
31 |   name: github-actions
32 | - color: 0e8a16
33 |   description: This issue or pull request is well-defined and good for newcomers
34 |   name: good first issue
35 | - color: ff7518
36 |   description: Pull request that should count toward Hacktoberfest participation
37 |   name: hacktoberfest-accepted
38 | - color: a2eeef
39 |   description: This issue or pull request will add or improve functionality, maintainability, or ease of use
40 |   name: improvement
41 | - color: fef2c0
42 |   description: This issue or pull request is not applicable, incorrect, or obsolete
43 |   name: invalid
44 | - color: ce099a
45 |   description: This pull request is ready to merge during the next Lineage Kraken release
46 |   name: kraken 🐙
47 | - color: a4fc5d
48 |   description: This issue or pull request requires further information
49 |   name: need info
50 | - color: fcdb45
51 |   description: This pull request is awaiting an action or decision to move forward
52 |   name: on hold
53 | - color: 3772a4
54 |   description: Pull requests that update Python code
55 |   name: python
56 | - color: ef476c
57 |   description: This issue is a request for information or needs discussion
58 |   name: question
59 | - color: d73a4a
60 |   description: This issue or pull request addresses a security issue
61 |   name: security
62 | - color: 00008b
63 |   description: This issue or pull request adds or otherwise modifies test code
64 |   name: test
65 | - color: 1d76db
66 |   description: This issue or pull request pulls in upstream updates
67 |   name: upstream update
68 | - color: d4c5f9
69 |   description: This issue or pull request increments the version number
70 |   name: version bump
71 | - color: ffffff
72 |   description: This issue will not be incorporated
73 |   name: wontfix
74 | 


--------------------------------------------------------------------------------
/.github/lineage.yml:
--------------------------------------------------------------------------------
1 | ---
2 | lineage:
3 |   skeleton:
4 |     remote-url: https://github.com/cisagov/skeleton-python-library.git
5 | version: "1"
6 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: build
  3 | 
  4 | on:  # yamllint disable-line rule:truthy
  5 |   merge_group:
  6 |     types:
  7 |       - checks_requested
  8 |   pull_request:
  9 |   push:
 10 |   repository_dispatch:
 11 |     types:
 12 |       - apb
 13 | 
 14 | # Set a default shell for any run steps. The `-Eueo pipefail` sets errtrace,
 15 | # nounset, errexit, and pipefail. The `-x` will print all commands as they are
 16 | # run. Please see the GitHub Actions documentation for more information:
 17 | # https://docs.github.com/en/actions/using-jobs/setting-default-values-for-jobs
 18 | defaults:
 19 |   run:
 20 |     shell: bash -Eueo pipefail -x {0}
 21 | 
 22 | env:
 23 |   PIP_CACHE_DIR: ~/.cache/pip
 24 |   PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit
 25 |   RUN_TMATE: ${{ secrets.RUN_TMATE }}
 26 |   TERRAFORM_DOCS_REPO_BRANCH_NAME: improvement/support_atx_closed_markdown_headers
 27 |   TERRAFORM_DOCS_REPO_DEPTH: 1
 28 |   TERRAFORM_DOCS_REPO_URL: https://github.com/mcdonnnj/terraform-docs.git
 29 | 
 30 | jobs:
 31 |   diagnostics:
 32 |     name: Run diagnostics
 33 |     # This job does not need any permissions
 34 |     permissions: {}
 35 |     runs-on: ubuntu-latest
 36 |     steps:
 37 |       # Note that a duplicate of this step must be added at the top of
 38 |       # each job.
 39 |       - name: Apply standard cisagov job preamble
 40 |         uses: cisagov/action-job-preamble@v1
 41 |         with:
 42 |           check_github_status: "true"
 43 |           # This functionality is poorly implemented and has been
 44 |           # causing problems due to the MITM implementation hogging or
 45 |           # leaking memory.  As a result we disable it by default.  If
 46 |           # you want to temporarily enable it, simply set
 47 |           # monitor_permissions equal to "true".
 48 |           #
 49 |           # TODO: Re-enable this functionality when practical.  See
 50 |           # cisagov/skeleton-generic#207 for more details.
 51 |           monitor_permissions: "false"
 52 |           output_workflow_context: "true"
 53 |           # Use a variable to specify the permissions monitoring
 54 |           # configuration. By default this will yield the
 55 |           # configuration stored in the cisagov organization-level
 56 |           # variable, but if you want to use a different configuration
 57 |           # then simply:
 58 |           # 1. Create a repository-level variable with the name
 59 |           # ACTIONS_PERMISSIONS_CONFIG.
 60 |           # 2. Set this new variable's value to the configuration you
 61 |           # want to use for this repository.
 62 |           #
 63 |           # Note in particular that changing the permissions
 64 |           # monitoring configuration *does not* require you to modify
 65 |           # this workflow.
 66 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
 67 |   lint:
 68 |     needs:
 69 |       - diagnostics
 70 |     permissions:
 71 |       # actions/checkout needs this to fetch code
 72 |       contents: read
 73 |     runs-on: ubuntu-latest
 74 |     steps:
 75 |       - name: Apply standard cisagov job preamble
 76 |         uses: cisagov/action-job-preamble@v1
 77 |         with:
 78 |           # This functionality is poorly implemented and has been
 79 |           # causing problems due to the MITM implementation hogging or
 80 |           # leaking memory.  As a result we disable it by default.  If
 81 |           # you want to temporarily enable it, simply set
 82 |           # monitor_permissions equal to "true".
 83 |           #
 84 |           # TODO: Re-enable this functionality when practical.  See
 85 |           # cisagov/skeleton-generic#207 for more details.
 86 |           monitor_permissions: "false"
 87 |           # Use a variable to specify the permissions monitoring
 88 |           # configuration. By default this will yield the
 89 |           # configuration stored in the cisagov organization-level
 90 |           # variable, but if you want to use a different configuration
 91 |           # then simply:
 92 |           # 1. Create a repository-level variable with the name
 93 |           # ACTIONS_PERMISSIONS_CONFIG.
 94 |           # 2. Set this new variable's value to the configuration you
 95 |           # want to use for this repository.
 96 |           #
 97 |           # Note in particular that changing the permissions
 98 |           # monitoring configuration *does not* require you to modify
 99 |           # this workflow.
100 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
101 |       - id: setup-env
102 |         uses: cisagov/setup-env-github-action@develop
103 |       - uses: actions/checkout@v4
104 |       - id: setup-python
105 |         uses: actions/setup-python@v5
106 |         with:
107 |           # python-version: ${{ steps.setup-env.outputs.python-version }}
108 |           # This project cannot currently support Python 3.11 or 3.12.
109 |           python-version: "3.10"
110 |       # We need the Go version and Go cache location for the actions/cache step,
111 |       # so the Go installation must happen before that.
112 |       - id: setup-go
113 |         uses: actions/setup-go@v5
114 |         with:
115 |           # There is no expectation for actual Go code so we disable caching as
116 |           # it relies on the existence of a go.sum file.
117 |           cache: false
118 |           go-version: ${{ steps.setup-env.outputs.go-version }}
119 |       - id: go-cache
120 |         name: Lookup Go cache directory
121 |         run: |
122 |           echo "dir=$(go env GOCACHE)" >> $GITHUB_OUTPUT
123 |       - uses: actions/cache@v4
124 |         env:
125 |           BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\
126 |             py${{ steps.setup-python.outputs.python-version }}-\
127 |             go${{ steps.setup-go.outputs.go-version }}-\
128 |             packer${{ steps.setup-env.outputs.packer-version }}-\
129 |             tf${{ steps.setup-env.outputs.terraform-version }}-
130 |         with:
131 |           # We do not use '**/setup.py' in the cache key so only the 'setup.py'
132 |           # file in the root of the repository is used. This is in case a Python
133 |           # package were to have a 'setup.py' as part of its internal codebase.
134 |           key: ${{ env.BASE_CACHE_KEY }}\
135 |             ${{ hashFiles('**/requirements-test.txt') }}-\
136 |             ${{ hashFiles('**/requirements.txt') }}-\
137 |             ${{ hashFiles('**/.pre-commit-config.yaml') }}-\
138 |             ${{ hashFiles('setup.py') }}
139 |           # Note that the .terraform directory IS NOT included in the
140 |           # cache because if we were caching, then we would need to use
141 |           # the `-upgrade=true` option. This option blindly pulls down the
142 |           # latest modules and providers instead of checking to see if an
143 |           # update is required. That behavior defeats the benefits of caching.
144 |           # so there is no point in doing it for the .terraform directory.
145 |           path: |
146 |             ${{ env.PIP_CACHE_DIR }}
147 |             ${{ env.PRE_COMMIT_CACHE_DIR }}
148 |             ${{ steps.go-cache.outputs.dir }}
149 |           restore-keys: |
150 |             ${{ env.BASE_CACHE_KEY }}
151 |       - uses: hashicorp/setup-packer@v3
152 |         with:
153 |           version: ${{ steps.setup-env.outputs.packer-version }}
154 |       - uses: hashicorp/setup-terraform@v3
155 |         with:
156 |           terraform_version: ${{ steps.setup-env.outputs.terraform-version }}
157 |       - name: Install go-critic
158 |         env:
159 |           PACKAGE_URL: github.com/go-critic/go-critic/cmd/gocritic
160 |           PACKAGE_VERSION: ${{ steps.setup-env.outputs.go-critic-version }}
161 |         run: go install ${PACKAGE_URL}@${PACKAGE_VERSION}
162 |       - name: Install goimports
163 |         env:
164 |           PACKAGE_URL: golang.org/x/tools/cmd/goimports
165 |           PACKAGE_VERSION: ${{ steps.setup-env.outputs.goimports-version }}
166 |         run: go install ${PACKAGE_URL}@${PACKAGE_VERSION}
167 |       - name: Install gosec
168 |         env:
169 |           PACKAGE_URL: github.com/securego/gosec/v2/cmd/gosec
170 |           PACKAGE_VERSION: ${{ steps.setup-env.outputs.gosec-version }}
171 |         run: go install ${PACKAGE_URL}@${PACKAGE_VERSION}
172 |       - name: Install staticcheck
173 |         env:
174 |           PACKAGE_URL: honnef.co/go/tools/cmd/staticcheck
175 |           PACKAGE_VERSION: ${{ steps.setup-env.outputs.staticcheck-version }}
176 |         run: go install ${PACKAGE_URL}@${PACKAGE_VERSION}
177 |       # TODO: https://github.com/cisagov/skeleton-generic/issues/165
178 |       # We are temporarily using @mcdonnnj's forked branch of terraform-docs
179 |       # until his PR: https://github.com/terraform-docs/terraform-docs/pull/745
180 |       # is approved. This temporary fix will allow for ATX header support when
181 |       # terraform-docs is run during linting.
182 |       - name: Clone ATX headers branch from terraform-docs fork
183 |         run: |
184 |           git clone \
185 |            --branch $TERRAFORM_DOCS_REPO_BRANCH_NAME \
186 |            --depth $TERRAFORM_DOCS_REPO_DEPTH \
187 |            --single-branch \
188 |            $TERRAFORM_DOCS_REPO_URL /tmp/terraform-docs
189 |       - name: Build and install terraform-docs binary
190 |         run: |
191 |           go build \
192 |           -C /tmp/terraform-docs \
193 |           -o $(go env GOPATH)/bin/terraform-docs
194 |       - name: Install dependencies
195 |         run: |
196 |           python -m pip install --upgrade pip setuptools wheel
197 |           pip install --upgrade --requirement requirements-test.txt
198 |       - name: Set up pre-commit hook environments
199 |         run: pre-commit install-hooks
200 |       - name: Run pre-commit on all files
201 |         run: pre-commit run --all-files
202 |       - name: Setup tmate debug session
203 |         uses: mxschmitt/action-tmate@v3
204 |         if: env.RUN_TMATE
205 |   test:
206 |     name: test source - py${{ matrix.python-version }}
207 |     needs:
208 |       - diagnostics
209 |     permissions:
210 |       # actions/checkout needs this to fetch code
211 |       contents: read
212 |     runs-on: ${{ matrix.os }}
213 |     strategy:
214 |       fail-fast: false
215 |       matrix:
216 |         include:
217 |           - os: ubuntu-22.04
218 |             python-version: "3.7"
219 |         os:
220 |           - ubuntu-latest
221 |         python-version:
222 |           - "3.8"
223 |           - "3.9"
224 |           - "3.10"
225 |           # - "3.11"
226 |           # - "3.12"
227 |           # - "3.13"
228 |     steps:
229 |       - name: Apply standard cisagov job preamble
230 |         uses: cisagov/action-job-preamble@v1
231 |         with:
232 |           # This functionality is poorly implemented and has been
233 |           # causing problems due to the MITM implementation hogging or
234 |           # leaking memory.  As a result we disable it by default.  If
235 |           # you want to temporarily enable it, simply set
236 |           # monitor_permissions equal to "true".
237 |           #
238 |           # TODO: Re-enable this functionality when practical.  See
239 |           # cisagov/skeleton-python-library#149 for more details.
240 |           monitor_permissions: "false"
241 |           # Use a variable to specify the permissions monitoring
242 |           # configuration. By default this will yield the
243 |           # configuration stored in the cisagov organization-level
244 |           # variable, but if you want to use a different configuration
245 |           # then simply:
246 |           # 1. Create a repository-level variable with the name
247 |           # ACTIONS_PERMISSIONS_CONFIG.
248 |           # 2. Set this new variable's value to the configuration you
249 |           # want to use for this repository.
250 |           #
251 |           # Note in particular that changing the permissions
252 |           # monitoring configuration *does not* require you to modify
253 |           # this workflow.
254 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
255 |       - uses: actions/checkout@v4
256 |       - id: setup-python
257 |         uses: actions/setup-python@v5
258 |         with:
259 |           python-version: ${{ matrix.python-version }}
260 |       - uses: actions/cache@v4
261 |         env:
262 |           BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\
263 |             py${{ steps.setup-python.outputs.python-version }}-
264 |         with:
265 |           path: ${{ env.PIP_CACHE_DIR }}
266 |           # We do not use '**/setup.py' in the cache key so only the 'setup.py'
267 |           # file in the root of the repository is used. This is in case a Python
268 |           # package were to have a 'setup.py' as part of its internal codebase.
269 |           key: ${{ env.BASE_CACHE_KEY }}\
270 |             ${{ hashFiles('**/requirements-test.txt') }}-\
271 |             ${{ hashFiles('**/requirements.txt') }}-\
272 |             ${{ hashFiles('setup.py') }}
273 |           restore-keys: |
274 |             ${{ env.BASE_CACHE_KEY }}
275 |       - name: Install dependencies
276 |         run: |
277 |           python -m pip install --upgrade pip
278 |           pip install --upgrade --requirement requirements-test.txt
279 |       - name: Run tests
280 |         env:
281 |           RELEASE_TAG: ${{ github.event.release.tag_name }}
282 |         run: pytest
283 |       - name: Upload coverage report
284 |         uses: coverallsapp/github-action@v2
285 |         with:
286 |           flag-name: py${{ matrix.python-version }}
287 |           parallel: true
288 |         if: success()
289 |       - name: Setup tmate debug session
290 |         uses: mxschmitt/action-tmate@v3
291 |         if: env.RUN_TMATE
292 |   coveralls-finish:
293 |     permissions:
294 |       # actions/checkout needs this to fetch code
295 |       contents: read
296 |     runs-on: ubuntu-latest
297 |     needs:
298 |       - diagnostics
299 |       - test
300 |     steps:
301 |       - name: Apply standard cisagov job preamble
302 |         uses: cisagov/action-job-preamble@v1
303 |         with:
304 |           # This functionality is poorly implemented and has been
305 |           # causing problems due to the MITM implementation hogging or
306 |           # leaking memory.  As a result we disable it by default.  If
307 |           # you want to temporarily enable it, simply set
308 |           # monitor_permissions equal to "true".
309 |           #
310 |           # TODO: Re-enable this functionality when practical.  See
311 |           # cisagov/skeleton-python-library#149 for more details.
312 |           monitor_permissions: "false"
313 |           # Use a variable to specify the permissions monitoring
314 |           # configuration. By default this will yield the
315 |           # configuration stored in the cisagov organization-level
316 |           # variable, but if you want to use a different configuration
317 |           # then simply:
318 |           # 1. Create a repository-level variable with the name
319 |           # ACTIONS_PERMISSIONS_CONFIG.
320 |           # 2. Set this new variable's value to the configuration you
321 |           # want to use for this repository.
322 |           #
323 |           # Note in particular that changing the permissions
324 |           # monitoring configuration *does not* require you to modify
325 |           # this workflow.
326 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
327 |       - uses: actions/checkout@v4
328 |       - name: Finished coveralls reports
329 |         uses: coverallsapp/github-action@v2
330 |         with:
331 |           parallel-finished: true
332 |       - name: Setup tmate debug session
333 |         uses: mxschmitt/action-tmate@v3
334 |         if: env.RUN_TMATE
335 |   build:
336 |     name: build wheel - py${{ matrix.python-version }}
337 |     needs:
338 |       - diagnostics
339 |       - lint
340 |       - test
341 |     permissions:
342 |       # actions/checkout needs this to fetch code
343 |       contents: read
344 |     runs-on: ${{ matrix.os }}
345 |     strategy:
346 |       fail-fast: false
347 |       matrix:
348 |         include:
349 |           - os: ubuntu-22.04
350 |             python-version: "3.7"
351 |         os:
352 |           - ubuntu-latest
353 |         python-version:
354 |           - "3.8"
355 |           - "3.9"
356 |           - "3.10"
357 |           # - "3.11"
358 |           # - "3.12"
359 |           # - "3.13"
360 |     steps:
361 |       - name: Apply standard cisagov job preamble
362 |         uses: cisagov/action-job-preamble@v1
363 |         with:
364 |           # This functionality is poorly implemented and has been
365 |           # causing problems due to the MITM implementation hogging or
366 |           # leaking memory.  As a result we disable it by default.  If
367 |           # you want to temporarily enable it, simply set
368 |           # monitor_permissions equal to "true".
369 |           #
370 |           # TODO: Re-enable this functionality when practical.  See
371 |           # cisagov/skeleton-python-library#149 for more details.
372 |           monitor_permissions: "false"
373 |           # Use a variable to specify the permissions monitoring
374 |           # configuration. By default this will yield the
375 |           # configuration stored in the cisagov organization-level
376 |           # variable, but if you want to use a different configuration
377 |           # then simply:
378 |           # 1. Create a repository-level variable with the name
379 |           # ACTIONS_PERMISSIONS_CONFIG.
380 |           # 2. Set this new variable's value to the configuration you
381 |           # want to use for this repository.
382 |           #
383 |           # Note in particular that changing the permissions
384 |           # monitoring configuration *does not* require you to modify
385 |           # this workflow.
386 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
387 |       - uses: actions/checkout@v4
388 |       - id: setup-python
389 |         uses: actions/setup-python@v5
390 |         with:
391 |           python-version: ${{ matrix.python-version }}
392 |       - uses: actions/cache@v4
393 |         env:
394 |           BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\
395 |             py${{ steps.setup-python.outputs.python-version }}-
396 |         with:
397 |           path: ${{ env.PIP_CACHE_DIR }}
398 |           # We do not use '**/setup.py' in the cache key so only the 'setup.py'
399 |           # file in the root of the repository is used. This is in case a Python
400 |           # package were to have a 'setup.py' as part of its internal codebase.
401 |           key: ${{ env.BASE_CACHE_KEY }}\
402 |             ${{ hashFiles('**/requirements.txt') }}-\
403 |             ${{ hashFiles('setup.py') }}
404 |           restore-keys: |
405 |             ${{ env.BASE_CACHE_KEY }}
406 |       - name: Install build dependencies
407 |         run: |
408 |           python -m pip install --upgrade pip setuptools wheel
409 |           python -m pip install --upgrade build
410 |       - name: Build artifacts
411 |         run: python -m build
412 |       - name: Upload artifacts
413 |         uses: actions/upload-artifact@v4
414 |         with:
415 |           name: dist-${{ matrix.python-version }}
416 |           path: dist
417 |       - name: Setup tmate debug session
418 |         uses: mxschmitt/action-tmate@v3
419 |         if: env.RUN_TMATE
420 |   test-build:
421 |     name: test built wheel - py${{ matrix.python-version }}
422 |     needs:
423 |       - diagnostics
424 |       - build
425 |     permissions:
426 |       # actions/checkout needs this to fetch code
427 |       contents: read
428 |     runs-on: ${{ matrix.os }}
429 |     strategy:
430 |       fail-fast: false
431 |       matrix:
432 |         include:
433 |           - os: ubuntu-22.04
434 |             python-version: "3.7"
435 |         os:
436 |           - ubuntu-latest
437 |         python-version:
438 |           - "3.8"
439 |           - "3.9"
440 |           - "3.10"
441 |           # - "3.11"
442 |           # - "3.12"
443 |           # - "3.13"
444 |     steps:
445 |       - name: Apply standard cisagov job preamble
446 |         uses: cisagov/action-job-preamble@v1
447 |         with:
448 |           # This functionality is poorly implemented and has been
449 |           # causing problems due to the MITM implementation hogging or
450 |           # leaking memory.  As a result we disable it by default.  If
451 |           # you want to temporarily enable it, simply set
452 |           # monitor_permissions equal to "true".
453 |           #
454 |           # TODO: Re-enable this functionality when practical.  See
455 |           # cisagov/skeleton-python-library#149 for more details.
456 |           monitor_permissions: "false"
457 |           # Use a variable to specify the permissions monitoring
458 |           # configuration. By default this will yield the
459 |           # configuration stored in the cisagov organization-level
460 |           # variable, but if you want to use a different configuration
461 |           # then simply:
462 |           # 1. Create a repository-level variable with the name
463 |           # ACTIONS_PERMISSIONS_CONFIG.
464 |           # 2. Set this new variable's value to the configuration you
465 |           # want to use for this repository.
466 |           #
467 |           # Note in particular that changing the permissions
468 |           # monitoring configuration *does not* require you to modify
469 |           # this workflow.
470 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
471 |       - uses: actions/checkout@v4
472 |       - id: setup-python
473 |         uses: actions/setup-python@v5
474 |         with:
475 |           python-version: ${{ matrix.python-version }}
476 |       - uses: actions/cache@v4
477 |         env:
478 |           BASE_CACHE_KEY: ${{ github.job }}-${{ runner.os }}-\
479 |             py${{ steps.setup-python.outputs.python-version }}-
480 |         with:
481 |           path: ${{ env.PIP_CACHE_DIR }}
482 |           # We do not use '**/setup.py' in the cache key so only the 'setup.py'
483 |           # file in the root of the repository is used. This is in case a Python
484 |           # package were to have a 'setup.py' as part of its internal codebase.
485 |           key: ${{ env.BASE_CACHE_KEY }}\
486 |             ${{ hashFiles('**/requirements.txt') }}-\
487 |             ${{ hashFiles('setup.py') }}
488 |           restore-keys: |
489 |             ${{ env.BASE_CACHE_KEY }}
490 |       - name: Retrieve the built wheel
491 |         uses: actions/download-artifact@v4
492 |         with:
493 |           name: dist-${{ matrix.python-version }}
494 |           path: dist
495 |       - id: find-wheel
496 |         name: Get the name of the retrieved wheel (there should only be one)
497 |         run: echo "wheel=$(ls dist/*whl)" >> $GITHUB_OUTPUT
498 |       - name: Update core Python packages
499 |         run: python -m pip install --upgrade pip setuptools wheel
500 |       - name: Install the built wheel (along with testing dependencies)
501 |         run: python -m pip install ${{ steps.find-wheel.outputs.wheel }}[test]
502 |       - name: Run tests
503 |         env:
504 |           RELEASE_TAG: ${{ github.event.release.tag_name }}
505 |         run: pytest
506 |       - name: Setup tmate debug session
507 |         uses: mxschmitt/action-tmate@v3
508 |         if: env.RUN_TMATE
509 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # For most projects, this workflow file will not need changing; you simply need
  3 | # to commit it to your repository.
  4 | #
  5 | # You may wish to alter this file to override the set of languages analyzed,
  6 | # or to provide custom queries or build logic.
  7 | name: CodeQL
  8 | 
  9 | # The use of on here as a key is part of the GitHub actions syntax.
 10 | # yamllint disable-line rule:truthy
 11 | on:
 12 |   merge_group:
 13 |     types:
 14 |       - checks_requested
 15 |   pull_request:
 16 |     # The branches here must be a subset of the ones in the push key
 17 |     branches:
 18 |       - develop
 19 |   push:
 20 |     # Dependabot-triggered push events have read-only access, but uploading code
 21 |     # scanning requires write access.
 22 |     branches-ignore:
 23 |       - dependabot/**
 24 |   schedule:
 25 |     - cron: 0 14 * * 6
 26 | 
 27 | jobs:
 28 |   diagnostics:
 29 |     name: Run diagnostics
 30 |     # This job does not need any permissions
 31 |     permissions: {}
 32 |     runs-on: ubuntu-latest
 33 |     steps:
 34 |       # Note that a duplicate of this step must be added at the top of
 35 |       # each job.
 36 |       - name: Apply standard cisagov job preamble
 37 |         uses: cisagov/action-job-preamble@v1
 38 |         with:
 39 |           check_github_status: "true"
 40 |           # This functionality is poorly implemented and has been
 41 |           # causing problems due to the MITM implementation hogging or
 42 |           # leaking memory.  As a result we disable it by default.  If
 43 |           # you want to temporarily enable it, simply set
 44 |           # monitor_permissions equal to "true".
 45 |           #
 46 |           # TODO: Re-enable this functionality when practical.  See
 47 |           # cisagov/skeleton-generic#207 for more details.
 48 |           monitor_permissions: "false"
 49 |           output_workflow_context: "true"
 50 |           # Use a variable to specify the permissions monitoring
 51 |           # configuration. By default this will yield the
 52 |           # configuration stored in the cisagov organization-level
 53 |           # variable, but if you want to use a different configuration
 54 |           # then simply:
 55 |           # 1. Create a repository-level variable with the name
 56 |           # ACTIONS_PERMISSIONS_CONFIG.
 57 |           # 2. Set this new variable's value to the configuration you
 58 |           # want to use for this repository.
 59 |           #
 60 |           # Note in particular that changing the permissions
 61 |           # monitoring configuration *does not* require you to modify
 62 |           # this workflow.
 63 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
 64 |   analyze:
 65 |     name: Analyze
 66 |     needs:
 67 |       - diagnostics
 68 |     permissions:
 69 |       # actions/checkout needs this to fetch code
 70 |       contents: read
 71 |       # required for all workflows
 72 |       security-events: write
 73 |     runs-on: ubuntu-latest
 74 |     strategy:
 75 |       fail-fast: false
 76 |       matrix:
 77 |         # Override automatic language detection by changing the below
 78 |         # list
 79 |         #
 80 |         # Supported options are actions, c-cpp, csharp, go,
 81 |         # java-kotlin, javascript-typescript, python, ruby, and swift.
 82 |         language:
 83 |           - actions
 84 |           - python
 85 |         # Learn more...
 86 |         # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
 87 | 
 88 |     steps:
 89 |       - name: Apply standard cisagov job preamble
 90 |         uses: cisagov/action-job-preamble@v1
 91 |         with:
 92 |           # This functionality is poorly implemented and has been
 93 |           # causing problems due to the MITM implementation hogging or
 94 |           # leaking memory.  As a result we disable it by default.  If
 95 |           # you want to temporarily enable it, simply set
 96 |           # monitor_permissions equal to "true".
 97 |           #
 98 |           # TODO: Re-enable this functionality when practical.  See
 99 |           # cisagov/skeleton-generic#207 for more details.
100 |           monitor_permissions: "false"
101 |           # Use a variable to specify the permissions monitoring
102 |           # configuration. By default this will yield the
103 |           # configuration stored in the cisagov organization-level
104 |           # variable, but if you want to use a different configuration
105 |           # then simply:
106 |           # 1. Create a repository-level variable with the name
107 |           # ACTIONS_PERMISSIONS_CONFIG.
108 |           # 2. Set this new variable's value to the configuration you
109 |           # want to use for this repository.
110 |           #
111 |           # Note in particular that changing the permissions
112 |           # monitoring configuration *does not* require you to modify
113 |           # this workflow.
114 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
115 | 
116 |       - name: Checkout repository
117 |         uses: actions/checkout@v4
118 | 
119 |       # Initializes the CodeQL tools for scanning.
120 |       - name: Initialize CodeQL
121 |         uses: github/codeql-action/init@v3
122 |         with:
123 |           languages: ${{ matrix.language }}
124 | 
125 |       # Autobuild attempts to build any compiled languages (C/C++, C#, or
126 |       # Java). If this step fails, then you should remove it and run the build
127 |       # manually (see below).
128 |       - name: Autobuild
129 |         uses: github/codeql-action/autobuild@v3
130 | 
131 |       # ℹ️ Command-line programs to run using the OS shell.
132 |       # 📚 https://git.io/JvXDl
133 | 
134 |       # ✏️ If the Autobuild fails above, remove it and uncomment the following
135 |       #    three lines and modify them (or add more) to build your code if your
136 |       #    project uses a compiled language
137 | 
138 |       # - run: |
139 |       #     make bootstrap
140 |       #     make release
141 | 
142 |       - name: Perform CodeQL Analysis
143 |         uses: github/codeql-action/analyze@v3
144 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-review.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Dependency review
 3 | 
 4 | on:  # yamllint disable-line rule:truthy
 5 |   merge_group:
 6 |     types:
 7 |       - checks_requested
 8 |   pull_request:
 9 | 
10 | # Set a default shell for any run steps. The `-Eueo pipefail` sets errtrace,
11 | # nounset, errexit, and pipefail. The `-x` will print all commands as they are
12 | # run. Please see the GitHub Actions documentation for more information:
13 | # https://docs.github.com/en/actions/using-jobs/setting-default-values-for-jobs
14 | defaults:
15 |   run:
16 |     shell: bash -Eueo pipefail -x {0}
17 | 
18 | jobs:
19 |   diagnostics:
20 |     name: Run diagnostics
21 |     # This job does not need any permissions
22 |     permissions: {}
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       # Note that a duplicate of this step must be added at the top of
26 |       # each job.
27 |       - name: Apply standard cisagov job preamble
28 |         uses: cisagov/action-job-preamble@v1
29 |         with:
30 |           check_github_status: "true"
31 |           # This functionality is poorly implemented and has been
32 |           # causing problems due to the MITM implementation hogging or
33 |           # leaking memory.  As a result we disable it by default.  If
34 |           # you want to temporarily enable it, simply set
35 |           # monitor_permissions equal to "true".
36 |           #
37 |           # TODO: Re-enable this functionality when practical.  See
38 |           # cisagov/skeleton-generic#207 for more details.
39 |           monitor_permissions: "false"
40 |           output_workflow_context: "true"
41 |           # Use a variable to specify the permissions monitoring
42 |           # configuration. By default this will yield the
43 |           # configuration stored in the cisagov organization-level
44 |           # variable, but if you want to use a different configuration
45 |           # then simply:
46 |           # 1. Create a repository-level variable with the name
47 |           # ACTIONS_PERMISSIONS_CONFIG.
48 |           # 2. Set this new variable's value to the configuration you
49 |           # want to use for this repository.
50 |           #
51 |           # Note in particular that changing the permissions
52 |           # monitoring configuration *does not* require you to modify
53 |           # this workflow.
54 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
55 |   dependency-review:
56 |     name: Dependency review
57 |     needs:
58 |       - diagnostics
59 |     permissions:
60 |       # actions/checkout needs this to fetch code
61 |       contents: read
62 |     runs-on: ubuntu-latest
63 |     steps:
64 |       - name: Apply standard cisagov job preamble
65 |         uses: cisagov/action-job-preamble@v1
66 |         with:
67 |           # This functionality is poorly implemented and has been
68 |           # causing problems due to the MITM implementation hogging or
69 |           # leaking memory.  As a result we disable it by default.  If
70 |           # you want to temporarily enable it, simply set
71 |           # monitor_permissions equal to "true".
72 |           #
73 |           # TODO: Re-enable this functionality when practical.  See
74 |           # cisagov/skeleton-generic#207 for more details.
75 |           monitor_permissions: "false"
76 |           # Use a variable to specify the permissions monitoring
77 |           # configuration. By default this will yield the
78 |           # configuration stored in the cisagov organization-level
79 |           # variable, but if you want to use a different configuration
80 |           # then simply:
81 |           # 1. Create a repository-level variable with the name
82 |           # ACTIONS_PERMISSIONS_CONFIG.
83 |           # 2. Set this new variable's value to the configuration you
84 |           # want to use for this repository.
85 |           #
86 |           # Note in particular that changing the permissions
87 |           # monitoring configuration *does not* require you to modify
88 |           # this workflow.
89 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
90 |       - id: checkout-repo
91 |         name: Checkout the repository
92 |         uses: actions/checkout@v4
93 |       - id: dependency-review
94 |         name: Review dependency changes for vulnerabilities and license changes
95 |         uses: actions/dependency-review-action@v4
96 | 


--------------------------------------------------------------------------------
/.github/workflows/sync-labels.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: sync-labels
 3 | 
 4 | on:  # yamllint disable-line rule:truthy
 5 |   push:
 6 |     paths:
 7 |       - .github/labels.yml
 8 |       - .github/workflows/sync-labels.yml
 9 |   workflow_dispatch:
10 | 
11 | permissions:
12 |   contents: read
13 | 
14 | jobs:
15 |   diagnostics:
16 |     name: Run diagnostics
17 |     # This job does not need any permissions
18 |     permissions: {}
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       # Note that a duplicate of this step must be added at the top of
22 |       # each job.
23 |       - name: Apply standard cisagov job preamble
24 |         uses: cisagov/action-job-preamble@v1
25 |         with:
26 |           check_github_status: "true"
27 |           # This functionality is poorly implemented and has been
28 |           # causing problems due to the MITM implementation hogging or
29 |           # leaking memory.  As a result we disable it by default.  If
30 |           # you want to temporarily enable it, simply set
31 |           # monitor_permissions equal to "true".
32 |           #
33 |           # TODO: Re-enable this functionality when practical.  See
34 |           # cisagov/skeleton-generic#207 for more details.
35 |           monitor_permissions: "false"
36 |           output_workflow_context: "true"
37 |           # Use a variable to specify the permissions monitoring
38 |           # configuration. By default this will yield the
39 |           # configuration stored in the cisagov organization-level
40 |           # variable, but if you want to use a different configuration
41 |           # then simply:
42 |           # 1. Create a repository-level variable with the name
43 |           # ACTIONS_PERMISSIONS_CONFIG.
44 |           # 2. Set this new variable's value to the configuration you
45 |           # want to use for this repository.
46 |           #
47 |           # Note in particular that changing the permissions
48 |           # monitoring configuration *does not* require you to modify
49 |           # this workflow.
50 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
51 |   labeler:
52 |     needs:
53 |       - diagnostics
54 |     permissions:
55 |       # actions/checkout needs this to fetch code
56 |       contents: read
57 |       # crazy-max/ghaction-github-labeler needs this to manage repository labels
58 |       issues: write
59 |     runs-on: ubuntu-latest
60 |     steps:
61 |       - name: Apply standard cisagov job preamble
62 |         uses: cisagov/action-job-preamble@v1
63 |         with:
64 |           # This functionality is poorly implemented and has been
65 |           # causing problems due to the MITM implementation hogging or
66 |           # leaking memory.  As a result we disable it by default.  If
67 |           # you want to temporarily enable it, simply set
68 |           # monitor_permissions equal to "true".
69 |           #
70 |           # TODO: Re-enable this functionality when practical.  See
71 |           # cisagov/skeleton-generic#207 for more details.
72 |           monitor_permissions: "false"
73 |           # Use a variable to specify the permissions monitoring
74 |           # configuration. By default this will yield the
75 |           # configuration stored in the cisagov organization-level
76 |           # variable, but if you want to use a different configuration
77 |           # then simply:
78 |           # 1. Create a repository-level variable with the name
79 |           # ACTIONS_PERMISSIONS_CONFIG.
80 |           # 2. Set this new variable's value to the configuration you
81 |           # want to use for this repository.
82 |           #
83 |           # Note in particular that changing the permissions
84 |           # monitoring configuration *does not* require you to modify
85 |           # this workflow.
86 |           permissions_monitoring_config: ${{ vars.ACTIONS_PERMISSIONS_CONFIG }}
87 |       - uses: actions/checkout@v4
88 |       - name: Sync repository labels
89 |         if: success()
90 |         uses: crazy-max/ghaction-github-labeler@v5
91 |         with:
92 |           # This is a hideous ternary equivalent so we only do a dry run unless
93 |           # this workflow is triggered by the develop branch.
94 |           dry-run: ${{ github.ref_name == 'develop' && 'false' || 'true' }}
95 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # This file specifies intentionally untracked files that Git should ignore.
 2 | # Files already tracked by Git are not affected.
 3 | # See: https://git-scm.com/docs/gitignore
 4 | 
 5 | ## Python ##
 6 | __pycache__
 7 | .coverage
 8 | .mypy_cache
 9 | .pytest_cache
10 | .python-version
11 | *.egg-info
12 | dist
13 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [settings]
 2 | combine_star=true
 3 | force_sort_within_sections=true
 4 | 
 5 | import_heading_stdlib=Standard Python Libraries
 6 | import_heading_thirdparty=Third-Party Libraries
 7 | import_heading_firstparty=cisagov Libraries
 8 | 
 9 | # Run isort under the black profile to align with our other Python linting
10 | profile=black
11 | 


--------------------------------------------------------------------------------
/.mdl_config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | # Default state for all rules
 4 | default: true
 5 | 
 6 | # MD003/heading-style/header-style - Heading style
 7 | MD003:
 8 |   # Enforce the ATX-closed style of header
 9 |   style: atx_closed
10 | 
11 | # MD004/ul-style - Unordered list style
12 | MD004:
13 |   # Enforce dashes for unordered lists
14 |   style: dash
15 | 
16 | # MD013/line-length - Line length
17 | MD013:
18 |   # Do not enforce for code blocks
19 |   code_blocks: false
20 |   # Do not enforce for tables
21 |   tables: false
22 | 
23 | # MD024/no-duplicate-heading/no-duplicate-header - Multiple headings with the
24 | # same content
25 | MD024:
26 |   # Allow headers with the same content as long as they are not in the same
27 |   # parent heading
28 |   allow_different_nesting: true
29 | 
30 | # MD029/ol-prefix - Ordered list item prefix
31 | MD029:
32 |   # Enforce the `1.` style for ordered lists
33 |   style: one
34 | 
35 | # MD033/no-inline-html - Inline HTML
36 | MD033:
37 |   # The h1 and img elements are allowed to permit header images
38 |   allowed_elements:
39 |     - h1
40 |     - img
41 | 
42 | # MD035/hr-style - Horizontal rule style
43 | MD035:
44 |   # Enforce dashes for horizontal rules
45 |   style: ---
46 | 
47 | # MD046/code-block-style - Code block style
48 | MD046:
49 |   # Enforce the fenced style for code blocks
50 |   style: fenced
51 | 
52 | # MD049/emphasis-style - Emphasis style should be consistent
53 | MD049:
54 |   # Enforce asterisks as the style to use for emphasis
55 |   style: asterisk
56 | 
57 | # MD050/strong-style - Strong style should be consistent
58 | MD050:
59 |   # Enforce asterisks as the style to use for strong
60 |   style: asterisk
61 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | default_language_version:
  3 |   # force all unspecified python hooks to run python3
  4 |   python: python3
  5 | 
  6 | repos:
  7 |   # Check the pre-commit configuration
  8 |   - repo: meta
  9 |     hooks:
 10 |       - id: check-useless-excludes
 11 | 
 12 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 13 |     rev: v5.0.0
 14 |     hooks:
 15 |       - id: check-case-conflict
 16 |       - id: check-executables-have-shebangs
 17 |       - id: check-json
 18 |       - id: check-merge-conflict
 19 |       - id: check-shebang-scripts-are-executable
 20 |       - id: check-symlinks
 21 |       - id: check-toml
 22 |       - id: check-vcs-permalinks
 23 |       - id: check-xml
 24 |       - id: debug-statements
 25 |       - id: destroyed-symlinks
 26 |       - id: detect-aws-credentials
 27 |         args:
 28 |           - --allow-missing-credentials
 29 |       - id: detect-private-key
 30 |       - id: end-of-file-fixer
 31 |       - id: mixed-line-ending
 32 |         args:
 33 |           - --fix=lf
 34 |       - id: pretty-format-json
 35 |         args:
 36 |           - --autofix
 37 |       - id: requirements-txt-fixer
 38 |       - id: trailing-whitespace
 39 | 
 40 |   # Text file hooks
 41 |   - repo: https://github.com/igorshubovych/markdownlint-cli
 42 |     rev: v0.44.0
 43 |     hooks:
 44 |       - id: markdownlint
 45 |         args:
 46 |           - --config=.mdl_config.yaml
 47 |   - repo: https://github.com/rbubley/mirrors-prettier
 48 |     rev: v3.5.3
 49 |     hooks:
 50 |       - id: prettier
 51 |   - repo: https://github.com/adrienverge/yamllint
 52 |     rev: v1.37.0
 53 |     hooks:
 54 |       - id: yamllint
 55 |         args:
 56 |           - --strict
 57 | 
 58 |   # GitHub Actions hooks
 59 |   - repo: https://github.com/python-jsonschema/check-jsonschema
 60 |     rev: 0.32.1
 61 |     hooks:
 62 |       - id: check-github-actions
 63 |       - id: check-github-workflows
 64 | 
 65 |   # pre-commit hooks
 66 |   - repo: https://github.com/pre-commit/pre-commit
 67 |     # pre-commit v3+ dropped support for Python <3.8. Until this project and
 68 |     # the build.yml workflow can migrate to Python 3.8 or newer we must
 69 |     # continue to use an older version.
 70 |     rev: v2.21.0
 71 |     hooks:
 72 |       - id: validate_manifest
 73 | 
 74 |   # Go hooks
 75 |   - repo: https://github.com/TekWizely/pre-commit-golang
 76 |     rev: v1.0.0-rc.1
 77 |     hooks:
 78 |       # Go Build
 79 |       - id: go-build-repo-mod
 80 |       # Style Checkers
 81 |       - id: go-critic
 82 |       # goimports
 83 |       - id: go-imports-repo
 84 |         args:
 85 |           # Write changes to files
 86 |           - -w
 87 |       # Go Mod Tidy
 88 |       - id: go-mod-tidy-repo
 89 |       # GoSec
 90 |       - id: go-sec-repo-mod
 91 |       # StaticCheck
 92 |       - id: go-staticcheck-repo-mod
 93 |       # Go Test
 94 |       - id: go-test-repo-mod
 95 |       # Go Vet
 96 |       - id: go-vet-repo-mod
 97 |   # Nix hooks
 98 |   - repo: https://github.com/nix-community/nixpkgs-fmt
 99 |     rev: v1.3.0
100 |     hooks:
101 |       - id: nixpkgs-fmt
102 | 
103 |   # Shell script hooks
104 |   - repo: https://github.com/scop/pre-commit-shfmt
105 |     rev: v3.11.0-1
106 |     hooks:
107 |       - id: shfmt
108 |         args:
109 |           # List files that will be formatted
110 |           - --list
111 |           # Write result to file instead of stdout
112 |           - --write
113 |           # Indent by two spaces
114 |           - --indent
115 |           - "2"
116 |           # Binary operators may start a line
117 |           - --binary-next-line
118 |           # Switch cases are indented
119 |           - --case-indent
120 |           # Redirect operators are followed by a space
121 |           - --space-redirects
122 |   - repo: https://github.com/shellcheck-py/shellcheck-py
123 |     rev: v0.10.0.1
124 |     hooks:
125 |       - id: shellcheck
126 | 
127 |   # Python hooks
128 |   # Run bandit on the "tests" tree with a configuration
129 |   - repo: https://github.com/PyCQA/bandit
130 |     # bandit 1.7.6 dropped support for Python <3.8. Until this project
131 |     # and the build.yml workflow can migrate to Python 3.8 or newer we
132 |     # must continue to use an older version.
133 |     rev: 1.7.5
134 |     hooks:
135 |       - id: bandit
136 |         name: bandit (tests tree)
137 |         files: tests
138 |         args:
139 |           - --config=.bandit.yml
140 |         additional_dependencies:
141 |           - importlib-metadata<5
142 |   # Run bandit on everything except the "tests" tree
143 |   - repo: https://github.com/PyCQA/bandit
144 |     # bandit 1.7.6 dropped support for Python <3.8. Until this project
145 |     # and the build.yml workflow can migrate to Python 3.8 or newer we
146 |     # must continue to use an older version.
147 |     rev: 1.7.5
148 |     hooks:
149 |       - id: bandit
150 |         name: bandit (everything else)
151 |         exclude: tests
152 |         additional_dependencies:
153 |           - importlib-metadata<5
154 |   - repo: https://github.com/psf/black-pre-commit-mirror
155 |     rev: 25.1.0
156 |     hooks:
157 |       - id: black
158 |   - repo: https://github.com/PyCQA/flake8
159 |     # flake8 v6+ dropped support for Python <3.8. Until this project and
160 |     # the build.yml workflow can migrate to Python 3.8 or newer we must
161 |     # continue to use an older version.
162 |     rev: 5.0.4
163 |     hooks:
164 |       - id: flake8
165 |         additional_dependencies:
166 |           - flake8-docstrings==1.7.0
167 |   - repo: https://github.com/PyCQA/isort
168 |     # isort 5.12.0 dropped support for Python <3.8. Until this project and
169 |     # the build.yml workflow can migrate to Python 3.8 or newer we must
170 |     # continue to use an older version.
171 |     rev: 5.11.5
172 |     hooks:
173 |       - id: isort
174 |   - repo: https://github.com/pre-commit/mirrors-mypy
175 |     # mypy 1.5.0 dropped support for Python <3.8. Until this project
176 |     # and the build.yml workflow can migrate to Python 3.8 or newer we
177 |     # must continue to use an older version.
178 |     rev: v1.4.1
179 |     hooks:
180 |       - id: mypy
181 |         # IMPORTANT: Keep type hinting-related dependencies of the
182 |         # mypy pre-commit hook additional_dependencies in sync with
183 |         # the dev section of setup.py to avoid discrepancies in type
184 |         # checking between environments.
185 |         additional_dependencies:
186 |           - pytest
187 |           - pytablewriter
188 |           - types-docopt
189 |           - types-pyOpenSSL
190 |           - types-requests
191 |           - types-setuptools
192 |           - types-urllib3
193 |         # Override the default arguments to drop the --ignore-missing-imports
194 |         # option to enforce a complete mypy configuration.
195 |         args:
196 |           - --scripts-are-modules
197 |   # pip-audit turns up several vulnerabilities for cryptography, but
198 |   # we cannot pull in a newer version of that library because we
199 |   # can't currently support any version of Python later than 3.10.
200 |   # - repo: https://github.com/pypa/pip-audit
201 |   #   rev: v2.7.3
202 |   #   hooks:
203 |   #     - id: pip-audit
204 |   #       args:
205 |   #         # Add any pip requirements files to scan
206 |   #         - --requirement
207 |   #         - requirements-dev.txt
208 |   #         - --requirement
209 |   #         - requirements-test.txt
210 |   #         - --requirement
211 |   #         - requirements.txt
212 |   - repo: https://github.com/asottile/pyupgrade
213 |     # pyupgrade no longer supports Python 3.7 as of version 3.4.0, so
214 |     # we cannot upgrade past the 3.3.2 release:
215 |     # https://github.com/asottile/pyupgrade/blob/v3.4.0/setup.cfg#L23
216 |     rev: v3.3.2
217 |     hooks:
218 |       - id: pyupgrade
219 | 
220 |   # Ansible hooks
221 |   # - repo: https://github.com/ansible/ansible-lint
222 |   #   # ansible-lint no longer supports Python 3.7 as of version 6.0, so
223 |   #   # we cannot upgrade past the 5.4.0 release:
224 |   #   # https://github.com/ansible/ansible-lint/releases/tag/v6.0.0
225 |   #   #
226 |   #   # But the 5.4.0 release causes a different failure because the
227 |   #   # version of ansible isn't correctly pinned.  The best way forward
228 |   #   # is to simply comment out this pre-commit hook until we can move
229 |   #   # to Python >3.7.
230 |   #   rev: v25.1.3
231 |   #   hooks:
232 |   #     - id: ansible-lint
233 |   #       additional_dependencies:
234 |   #         # On its own ansible-lint does not pull in ansible, only
235 |   #         # ansible-core.  Therefore, if an Ansible module lives in
236 |   #         # ansible instead of ansible-core, the linter will complain
237 |   #         # that the module is unknown.  In these cases it is
238 |   #         # necessary to add the ansible package itself as an
239 |   #         # additional dependency, with the same pinning as is done in
240 |   #         # requirements-test.txt of cisagov/skeleton-ansible-role.
241 |   #         #
242 |   #         # Version 10 is required because the pip-audit pre-commit
243 |   #         # hook identifies a vulnerability in ansible-core 2.16.13,
244 |   #         # but all versions of ansible 9 have a dependency on
245 |   #         # ~=2.16.X.
246 |   #         #
247 |   #         # It is also a good idea to go ahead and upgrade to version
248 |   #         # 10 since version 9 is going EOL at the end of November:
249 |   #         # https://endoflife.date/ansible
250 |   #         # - ansible>=10,<11
251 |   #         # ansible-core 2.16.3 through 2.16.6 suffer from the bug
252 |   #         # discussed in ansible/ansible#82702, which breaks any
253 |   #         # symlinked files in vars, tasks, etc. for any Ansible role
254 |   #         # installed via ansible-galaxy.  Hence we never want to
255 |   #         # install those versions.
256 |   #         #
257 |   #         # Note that the pip-audit pre-commit hook identifies a
258 |   #         # vulnerability in ansible-core 2.16.13.  The pin of
259 |   #         # ansible-core to >=2.17 effectively also pins ansible to
260 |   #         # >=10.
261 |   #         #
262 |   #         # It is also a good idea to go ahead and upgrade to
263 |   #         # ansible-core 2.17 since security support for ansible-core
264 |   #         # 2.16 ends this month:
265 |   # yamllint disable-line rule:line-length
266 |   #         # https://docs.ansible.com/ansible/devel/reference_appendices/release_and_maintenance.html#ansible-core-support-matrix
267 |   #         #
268 |   #         # Note that any changes made to this dependency must also be
269 |   #         # made in requirements.txt in cisagov/skeleton-packer and
270 |   #         # requirements-test.txt in cisagov/skeleton-ansible-role.
271 |   #         - ansible-core>=2.17
272 | 
273 |   # Terraform hooks
274 |   - repo: https://github.com/antonbabenko/pre-commit-terraform
275 |     rev: v1.98.0
276 |     hooks:
277 |       - id: terraform_fmt
278 |       - id: terraform_validate
279 | 
280 |   # Docker hooks
281 |   - repo: https://github.com/IamTheFij/docker-pre-commit
282 |     rev: v3.0.1
283 |     hooks:
284 |       - id: docker-compose-check
285 | 
286 |   # Packer hooks
287 |   - repo: https://github.com/cisagov/pre-commit-packer
288 |     rev: v0.3.0
289 |     hooks:
290 |       - id: packer_fmt
291 |       - id: packer_validate
292 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | # Already being linted by pretty-format-json
2 | *.json
3 | # Already being linted by mdl
4 | *.md
5 | # Already being linted by yamllint
6 | *.yaml
7 | *.yml
8 | 


--------------------------------------------------------------------------------
/.yamllint:
--------------------------------------------------------------------------------
 1 | ---
 2 | extends: default
 3 | 
 4 | rules:
 5 |   braces:
 6 |     # Do not allow non-empty flow mappings
 7 |     forbid: non-empty
 8 |     # Allow up to one space inside braces. This is required for Ansible compatibility.
 9 |     max-spaces-inside: 1
10 | 
11 |   brackets:
12 |     # Do not allow non-empty flow sequences
13 |     forbid: non-empty
14 | 
15 |   comments:
16 |     # Ensure that inline comments have at least one space before the preceding content.
17 |     # This is required for Ansible compatibility.
18 |     min-spaces-from-content: 1
19 | 
20 |   # yamllint does not like it when you comment out different parts of
21 |   # dictionaries in a list. You can see
22 |   # https://github.com/adrienverge/yamllint/issues/384 for some examples of
23 |   # this behavior.
24 |   comments-indentation: disable
25 | 
26 |   indentation:
27 |     # Ensure that block sequences inside of a mapping are indented
28 |     indent-sequences: true
29 |     # Enforce a specific number of spaces
30 |     spaces: 2
31 | 
32 |   # yamllint does not allow inline mappings that exceed the line length by
33 |   # default. There are many scenarios where the inline mapping may be a key,
34 |   # hash, or other long value that would exceed the line length but cannot
35 |   # reasonably be broken across lines.
36 |   line-length:
37 |     # This rule implies the allow-non-breakable-words rule
38 |     allow-non-breakable-inline-mappings: true
39 |     # Allows a 10% overage from the default limit of 80
40 |     max: 88
41 | 
42 |   # Using anything other than strings to express octal values can lead to unexpected
43 |   # and potentially unsafe behavior. Ansible strongly recommends against such practices
44 |   # and these rules are needed for Ansible compatibility. Please see the following for
45 |   # more information:
46 |   # https://ansible.readthedocs.io/projects/lint/rules/risky-octal/
47 |   octal-values:
48 |     # Do not allow explicit octal values (those beginning with a leading 0o).
49 |     forbid-explicit-octal: true
50 |     # Do not allow implicit octal values (those beginning with a leading 0).
51 |     forbid-implicit-octal: true
52 | 
53 |   quoted-strings:
54 |     # Allow disallowed quotes (single quotes) for strings that contain allowed quotes
55 |     # (double quotes).
56 |     allow-quoted-quotes: true
57 |     # Apply these rules to keys in mappings as well
58 |     check-keys: true
59 |     # We prefer double quotes for strings when they are needed
60 |     quote-type: double
61 |     # Only require quotes when they are necessary for proper processing
62 |     required: only-when-needed
63 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Welcome #
  2 | 
  3 | We're so glad you're thinking about contributing to this open source
  4 | project!  If you're unsure or afraid of anything, just ask or submit
  5 | the issue or pull request anyway.  The worst that can happen is that
  6 | you'll be politely asked to change something.  We appreciate any sort
  7 | of contribution, and don't want a wall of rules to get in the way of
  8 | that.
  9 | 
 10 | Before contributing, we encourage you to read our CONTRIBUTING policy
 11 | (you are here), our [LICENSE](LICENSE), and our [README](README.md),
 12 | all of which should be in this repository.
 13 | 
 14 | ## Issues ##
 15 | 
 16 | If you want to report a bug or request a new feature, the most direct
 17 | method is to [create an
 18 | issue](https://github.com/cisagov/pshtt/issues) in
 19 | this repository.  We recommend that you first search through existing
 20 | issues (both open and closed) to check if your particular issue has
 21 | already been reported.  If it has then you might want to add a comment
 22 | to the existing issue.  If it hasn't then feel free to create a new
 23 | one.
 24 | 
 25 | ## Pull requests ##
 26 | 
 27 | If you choose to [submit a pull
 28 | request](https://github.com/cisagov/pshtt/pulls),
 29 | you will notice that our continuous integration (CI) system runs a
 30 | fairly extensive set of linters, syntax checkers, system, and unit tests.
 31 | Your pull request may fail these checks, and that's OK.  If you want
 32 | you can stop there and wait for us to make the necessary corrections
 33 | to ensure your code passes the CI checks.
 34 | 
 35 | If you want to make the changes yourself, or if you want to become a
 36 | regular contributor, then you will want to set up
 37 | [pre-commit](https://pre-commit.com/) on your local machine.  Once you
 38 | do that, the CI checks will run locally before you even write your
 39 | commit message.  This speeds up your development cycle considerably.
 40 | 
 41 | ### Setting up pre-commit ###
 42 | 
 43 | There are a few ways to do this, but we prefer to use
 44 | [`pyenv`](https://github.com/pyenv/pyenv) and
 45 | [`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv) to
 46 | create and manage a Python virtual environment specific to this
 47 | project.
 48 | 
 49 | We recommend using the `setup-env` script located in this repository,
 50 | as it automates the entire environment configuration process. The
 51 | dependencies required to run this script are
 52 | [GNU `getopt`](https://github.com/util-linux/util-linux/blob/master/misc-utils/getopt.1.adoc),
 53 | [`pyenv`](https://github.com/pyenv/pyenv), and [`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv).
 54 | If these tools are already configured on your system, you can simply run the
 55 | following command:
 56 | 
 57 | ```console
 58 | ./setup-env
 59 | ```
 60 | 
 61 | Otherwise, follow the steps below to manually configure your
 62 | environment.
 63 | 
 64 | #### Installing and using GNU `getopt`, `pyenv`, and `pyenv-virtualenv` ####
 65 | 
 66 | On macOS, we recommend installing [brew](https://brew.sh/).  Then
 67 | installation is as simple as `brew install gnu-getopt pyenv pyenv-virtualenv` and
 68 | adding this to your profile:
 69 | 
 70 | ```bash
 71 | # GNU getopt must be explicitly added to the path since it is
 72 | # keg-only (https://docs.brew.sh/FAQ#what-does-keg-only-mean)
 73 | export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH"
 74 | 
 75 | # Setup pyenv
 76 | export PYENV_ROOT="$HOME/.pyenv"
 77 | export PATH="$PYENV_ROOT/bin:$PATH"
 78 | eval "$(pyenv init --path)"
 79 | eval "$(pyenv init -)"
 80 | eval "$(pyenv virtualenv-init -)"
 81 | ```
 82 | 
 83 | For Linux, Windows Subsystem for Linux (WSL), or macOS (if you
 84 | don't want to use `brew`) you can use
 85 | [pyenv/pyenv-installer](https://github.com/pyenv/pyenv-installer) to
 86 | install the necessary tools. Before running this ensure that you have
 87 | installed the prerequisites for your platform according to the
 88 | [`pyenv` wiki
 89 | page](https://github.com/pyenv/pyenv/wiki/common-build-problems).
 90 | GNU `getopt` is included in most Linux distributions as part of the
 91 | [`util-linux`](https://github.com/util-linux/util-linux) package.
 92 | 
 93 | On WSL you should treat your platform as whatever Linux distribution
 94 | you've chosen to install.
 95 | 
 96 | Once you have installed `pyenv` you will need to add the following
 97 | lines to your `.bash_profile` (or `.profile`):
 98 | 
 99 | ```bash
100 | export PYENV_ROOT="$HOME/.pyenv"
101 | export PATH="$PYENV_ROOT/bin:$PATH"
102 | eval "$(pyenv init --path)"
103 | ```
104 | 
105 | and then add the following lines to your `.bashrc`:
106 | 
107 | ```bash
108 | eval "$(pyenv init -)"
109 | eval "$(pyenv virtualenv-init -)"
110 | ```
111 | 
112 | If you want more information about setting up `pyenv` once installed, please run
113 | 
114 | ```console
115 | pyenv init
116 | ```
117 | 
118 | and
119 | 
120 | ```console
121 | pyenv virtualenv-init
122 | ```
123 | 
124 | for the current configuration instructions.
125 | 
126 | If you are using a shell other than `bash` you should follow the
127 | instructions that the `pyenv-installer` script outputs.
128 | 
129 | You will need to reload your shell for these changes to take effect so
130 | you can begin to use `pyenv`.
131 | 
132 | For a list of Python versions that are already installed and ready to
133 | use with `pyenv`, use the command `pyenv versions`.  To see a list of
134 | the Python versions available to be installed and used with `pyenv`
135 | use the command `pyenv install --list`.  You can read more
136 | [here](https://github.com/pyenv/pyenv/blob/master/COMMANDS.md) about
137 | the many things that `pyenv` can do.  See
138 | [here](https://github.com/pyenv/pyenv-virtualenv#usage) for the
139 | additional capabilities that pyenv-virtualenv adds to the `pyenv`
140 | command.
141 | 
142 | #### Creating the Python virtual environment ####
143 | 
144 | Once `pyenv` and `pyenv-virtualenv` are installed on your system, you
145 | can create and configure the Python virtual environment with these
146 | commands:
147 | 
148 | ```console
149 | cd pshtt
150 | pyenv virtualenv <python_version_to_use> pshtt
151 | pyenv local pshtt
152 | pip install --requirement requirements-dev.txt
153 | ```
154 | 
155 | #### Installing the pre-commit hook ####
156 | 
157 | Now setting up pre-commit is as simple as:
158 | 
159 | ```console
160 | pre-commit install
161 | ```
162 | 
163 | At this point the pre-commit checks will run against any files that
164 | you attempt to commit.  If you want to run the checks against the
165 | entire repo, just execute `pre-commit run --all-files`.
166 | 
167 | ### Running unit and system tests ###
168 | 
169 | In addition to the pre-commit checks the CI system will run the suite
170 | of unit and system tests that are included with this project.  To run
171 | these tests locally execute `pytest` from the root of the project.
172 | 
173 | We encourage any updates to these tests to improve the overall code
174 | coverage.  If your pull request adds new functionality we would
175 | appreciate it if you extend existing test cases, or add new ones to
176 | exercise the newly added code.
177 | 
178 | ## Public domain ##
179 | 
180 | This project is in the public domain within the United States, and
181 | copyright and related rights in the work worldwide are waived through
182 | the [CC0 1.0 Universal public domain
183 | dedication](https://creativecommons.org/publicdomain/zero/1.0/).
184 | 
185 | All contributions to this project will be released under the CC0
186 | dedication. By submitting a pull request, you are agreeing to comply
187 | with this waiver of copyright interest.
188 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pushing HTTPS 🔒 #
  2 | 
  3 | [![Latest Version](https://img.shields.io/pypi/v/pshtt.svg)](https://pypi.org/project/pshtt/)
  4 | [![GitHub Build Status](https://github.com/cisagov/pshtt/workflows/build/badge.svg)](https://github.com/cisagov/pshtt/actions)
  5 | [![CodeQL](https://github.com/cisagov/pshtt/workflows/CodeQL/badge.svg)](https://github.com/cisagov/pshtt/actions/workflows/codeql-analysis.yml)
  6 | [![Coverage Status](https://coveralls.io/repos/github/cisagov/pshtt/badge.svg?branch=develop)](https://coveralls.io/github/cisagov/pshtt?branch=develop)
  7 | [![Known Vulnerabilities](https://snyk.io/test/github/cisagov/pshtt/develop/badge.svg)](https://snyk.io/test/github/cisagov/pshtt)
  8 | 
  9 | `pshtt` (*"pushed"*) is a tool to scan domains for HTTPS best
 10 | practices. It saves its results to a CSV (or JSON) file.
 11 | 
 12 | `pshtt` was developed to *push* organizations — especially large ones
 13 | like the US Federal Government :us: — to adopt HTTPS across the
 14 | enterprise. Federal agencies must comply with
 15 | [M-15-13](https://https.cio.gov), a 2015 memorandum from the White
 16 | House Office of Management and Budget, and [BOD
 17 | 18-01](https://cyber.dhs.gov/bod/18-01/), a 2017 directive from the
 18 | Department of Homeland Security, which require federal agencies to
 19 | enforce HTTPS on their public web services. Much has been done, but
 20 | there's [more yet to
 21 | do](https://18f.gsa.gov/2017/01/04/tracking-the-us-governments-progress-on-moving-https/).
 22 | 
 23 | `pshtt` is a collaboration between the Cyber and Infrastructure
 24 | Security Agency's [National Cybersecurity Assessments and Technical
 25 | Services (NCATS) team](https://github.com/cisagov) and [the General
 26 | Service Administration's 18F team](https://18f.gsa.gov), with
 27 | [contributions from NASA, Lawrence Livermore National Laboratory, and
 28 | various non-governmental
 29 | organizations](https://github.com/cisagov/pshtt/graphs/contributors).
 30 | 
 31 | ## Getting started ##
 32 | 
 33 | `pshtt` can be installed as a module, or run directly from the
 34 | repository.
 35 | 
 36 | ### Installed as a module ###
 37 | 
 38 | `pshtt` can be installed directly via pip:
 39 | 
 40 | ```console
 41 | pip install pshtt
 42 | ```
 43 | 
 44 | It can then be run directly:
 45 | 
 46 | ```console
 47 | pshtt example.com [options]
 48 | ```
 49 | 
 50 | ### Running directly ###
 51 | 
 52 | To run the tool locally from the repository, without installing, first
 53 | install the requirements:
 54 | 
 55 | ```console
 56 | pip install -r requirements.txt
 57 | ```
 58 | 
 59 | Then run it as a module via `python -m`:
 60 | 
 61 | ```console
 62 | python -m pshtt.cli example.com [options]
 63 | ```
 64 | 
 65 | ### Usage and examples ###
 66 | 
 67 | ```console
 68 | pshtt [options] DOMAIN...
 69 | pshtt [options] INPUT
 70 | 
 71 | pshtt dhs.gov
 72 | pshtt --output=homeland.csv --debug dhs.gov us-cert.gov usss.gov
 73 | pshtt --sorted current-federal.csv
 74 | ```
 75 | 
 76 | Note: if INPUT ends with `.csv`, domains will be read from the first
 77 | column of the CSV. CSV output will always be written to disk (unless
 78 | --json is specified), defaulting to `results.csv`.
 79 | 
 80 | #### Options ####
 81 | 
 82 | ```console
 83 |   -h --help                     Show this message.
 84 |   -s --sorted                   Sort output by domain, A-Z.
 85 |   -o --output=OUTFILE           Name output file. (Defaults to "results".)
 86 |   -j --json                     Get results in JSON. (Defaults to CSV.)
 87 |   -m --markdown                 Get results in Markdown. (Defaults to CSV.)
 88 |   -d --debug                    Print debug output.
 89 |   -u --user-agent=AGENT         Override user agent.
 90 |   -t --timeout=TIMEOUT          Override timeout (in seconds).
 91 |   -c --cache-third-parties=DIR  Cache third party data, and what directory to cache it in.
 92 |   -f --ca-file=PATH             Specify custom CA bundle (PEM format)
 93 | ```
 94 | 
 95 | ##### Using your own CA bundle #####
 96 | 
 97 | By default, `pshtt` relies on the root CAs that are trusted in the
 98 | [Mozilla root
 99 | store](https://hg.mozilla.org/mozilla-central/raw-file/tip/security/nss/lib/ckfw/builtins/certdata.txt).
100 | If you work behind a corporate proxy or have your own certificates that
101 | aren't publicly trusted, you can specify your own CA bundle:
102 | 
103 | ```console
104 | pshtt --ca-file=/etc/ssl/ca.pem server.internal-location.gov
105 | ```
106 | 
107 | ## What's checked? ##
108 | 
109 | A domain is checked on its four endpoints:
110 | 
111 | - `http://`
112 | - `http://www`
113 | - `https://`
114 | - `https://www`
115 | 
116 | ### Domain and redirect info ###
117 | 
118 | The following values are returned in `results.csv`:
119 | 
120 | - `Domain` - The domain you're scanning!
121 | - `Base Domain` - The base domain of `Domain`. For example, for a
122 |   Domain of `sub.example.com`, the Base Domain will be
123 |   `example.com`. Usually this is the second-level domain, but `pshtt`
124 |   will download and factor in the [Public Suffix
125 |   List](https://publicsuffix.org) when calculating the base
126 |   domain. (To cache the Public Suffix List, use `--suffix-cache` as
127 |   documented above.)
128 | - `Canonical URL` - One of the four endpoints described above; a
129 |   judgment call based on the observed redirect logic of the domain.
130 | - `Live` - The domain is "live" if any endpoint is live.
131 | - `HTTPS Live` - The domain is "HTTPS live" if any HTTPS endpoint is
132 |   live.
133 | - `HTTPS Full Connection` - The domain is "fully connected" if any
134 |   HTTPS endpoint is fully connected.  A "fully connected" HTTPS
135 |   endpoint is one with which pshtt could make a full TLS connection.
136 | - `HTTPS Client Auth Required` - A domain requires client
137 |   authentication if *any* HTTPS endpoint requires it for a full TLS
138 |   connection.
139 | - `Redirect` - The domain is a "redirect domain" if at least one
140 |   endpoint is a redirect, and all endpoints are either redirects or
141 |   down.
142 | - `Redirect to` - If a domain is a "redirect domain", where does it
143 |   redirect to?
144 | 
145 | ### Landing on HTTPS ###
146 | 
147 | - `Valid HTTPS` - A domain has "valid HTTPS" if it responds on port
148 |   443 at the hostname in its Canonical URL with an unexpired valid
149 |   certificate for the hostname. This can be true even if the Canonical
150 |   URL uses HTTP.
151 | - `HTTPS Publicly Trusted` - A domain is "publicly trusted" if its
152 |   canonical endpoint has a publicly trusted certificate.
153 | - `HTTPS Custom Truststore Trusted` - A domain is "custom truststore
154 |   trusted" if its canonical endpoint has a certificate that is trusted
155 |   by the custom truststore.
156 | - `Defaults to HTTPS` - A domain "defaults to HTTPS" if its canonical
157 |   endpoint uses HTTPS.
158 | - `Downgrades HTTPS` - A domain "downgrades HTTPS" if HTTPS is
159 |   supported in some way, but its canonical HTTPS endpoint immediately
160 |   redirects internally to HTTP.
161 | - `Strictly Forces HTTPS` - This is different than whether a domain
162 |   "defaults" to HTTPS. A domain "Strictly Forces HTTPS" if one of the
163 |   HTTPS endpoints is "live", and if both HTTP endpoints are either
164 |   down or redirect immediately to any HTTPS URI. An HTTP redirect can
165 |   go to HTTPS on another domain, as long as it's immediate. (A domain
166 |   with an invalid cert can still be enforcing HTTPS.)
167 | 
168 | ### Common errors ###
169 | 
170 | - `HTTPS Bad Chain` - A domain has a bad chain if either HTTPS
171 |   endpoint contains a bad chain.
172 | - `HTTPS Bad Hostname` - A domain has a bad hostname if either HTTPS
173 |   endpoint fails hostname validation.
174 | - `HTTPS Expired Cert` - A domain has an expired certificate if either
175 |   HTTPS endpoint has an expired certificate.
176 | - `HTTPS Self-Signed Cert` - A domain has a self-signed certificate if
177 |   either HTTPS endpoint has a self-signed certificate.
178 | - `HTTPS Probably Missing Intermediate Cert` - A domain is "probably
179 |   missing intermediate certificate" if the canonical HTTPS endpoint is
180 |   probably missing an intermediate certificate.
181 | 
182 | ### HSTS ###
183 | 
184 | - `HSTS` - A domain has HTTP Strict Transport Security enabled if its
185 |   canonical HTTPS endpoint has HSTS enabled.
186 | - `HSTS Header` - This field provides a domain's HSTS header at its
187 |   canonical endpoint.
188 | - `HSTS Max Age` - A domain's HSTS max-age is its canonical endpoint's
189 |   max-age.
190 | - `HSTS Entire Domain` - A domain has HSTS enabled for the entire
191 |   domain if its **root HTTPS endpoint** (*not the canonical HTTPS
192 |   endpoint*) has HSTS enabled and uses the HSTS `includeSubDomains`
193 |   flag.
194 | - `HSTS Preload Ready` - A domain is HSTS "preload ready" if its
195 |   **root HTTPS endpoint** (*not the canonical HTTPS endpoint*) has
196 |   HSTS enabled, has a max-age of at least 18 weeks, and uses the
197 |   `includeSubDomains` and `preload` flag.
198 | - `HSTS Preload Pending` - A domain is "preload pending" when it
199 |   appears in the [Chrome preload pending
200 |   list](https://hstspreload.org/api/v2/pending) with the
201 |   `include_subdomains` flag equal to `true`.  The intent of `pshtt` is
202 |   to make sure that the user is *fully* protected, so it only counts
203 |   domains as HSTS preloaded if they are *fully* HSTS preloaded
204 |   (meaning that all subdomains are included as well).
205 | - `HSTS Preloaded` - A domain is HSTS preloaded if its domain name
206 |   appears in the [Chrome preload
207 |   list](https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json)
208 |   with the `include_subdomains` flag equal to `true`, regardless of
209 |   what header is present on any endpoint.  The intent of `pshtt` is to
210 |   make sure that the user is *fully* protected, so it only counts
211 |   domains as HSTS preloaded if they are *fully* HSTS preloaded
212 |   (meaning that all subdomains are included as well).
213 | - `Base Domain HSTS Preloaded` - A domain's base domain is HSTS
214 |   preloaded if its base domain appears in the [Chrome preload
215 |   list](https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json)
216 |   with the `include_subdomains` flag equal to `true`. This is subtly
217 |   different from `HSTS Entire Domain`, which inspects headers on the
218 |   base domain to see if HSTS is set correctly to encompass the entire
219 |   zone.
220 | 
221 | ### Scoring ###
222 | 
223 | These three fields use the previous results to come to high-level
224 | conclusions about a domain's behavior.
225 | 
226 | - `Domain Supports HTTPS` - A domain 'Supports HTTPS' when it doesn't
227 |   downgrade and has valid HTTPS, or when it doesn't downgrade and has
228 |   a bad chain but not a bad hostname (a bad hostname makes it clear
229 |   the domain isn't actively attempting to support HTTPS, whereas an
230 |   incomplete chain is just a mistake.). Domains with a bad chain
231 |   "support" HTTPS but user-side errors can be expected.
232 | - `Domain Enforces HTTPS` - A domain that 'Enforces HTTPS' must
233 |   'Support HTTPS' and default to HTTPS. For websites (where `Redirect`
234 |   is `false`) they are allowed to *eventually* redirect to an
235 |   `https://` URI. For "redirect domains" (domains where the `Redirect`
236 |   value is `true`) they must *immediately* redirect clients to an
237 |   `https://` URI (even if that URI is on another domain) in order to
238 |   be said to enforce HTTPS.
239 | - `Domain Uses Strong HSTS` - A domain 'Uses Strong HSTS' when the
240 |   max-age ≥ 31536000.
241 | 
242 | ### General information ###
243 | 
244 | - `IP` - The IP for the domain.
245 | - `Server Header` - The server header from the response for the
246 |   domain.
247 | - `Server Version` - The server version, as extracted from the server
248 |   header.
249 | - `HTTPS Cert Chain Length` - The certificate chain length for the
250 |   canonical HTTPS endpoint.
251 | - `Notes` - A field where free-form notes about the domain can be
252 |   stored.
253 | 
254 | ### Uncommon errors ###
255 | 
256 | - `Unknown Error` - A Boolean value indicating whether or not an
257 |   unexpected exception was encountered when testing the domain.  The
258 |   purpose of this field is to flag any odd websites for further
259 |   debugging.
260 | 
261 | ## Troubleshooting ##
262 | 
263 | ### DNS blackhole / DNS assist ###
264 | 
265 | One issue which can occur when running `pshtt`, particularly for
266 | home/residential networks, with standard ISPs is the use of "DNS
267 | Assist" features, a.k.a. "DNS Blackholes".
268 | 
269 | In these environments, you may see inconsistent results from `pshtt`
270 | owing to the fact that your ISP is attempting to detect a request for
271 | an unknown site without a DNS record and is redirecting you to a
272 | search page for that site. This means that an endpoint which *should*
273 | resolve as "not-alive", will instead resolve as "live", owing to the
274 | detection of the live search result page.
275 | 
276 | If you would like to disable this "feature", several ISPs offer the
277 | ability to opt out of this service, and maintain their own
278 | instructions for doing so:
279 | 
280 | - [AT&T](http://www.att.net/dnserrorassist/about/srchTrm=Redirect%20Bin)
281 | - [FIOS](https://www.verizon.com/support/residential/internet/fiosinternet/troubleshooting/network/questionsone/99147.htm)
282 | 
283 | ## Who uses pshtt? ##
284 | 
285 | - GSA maintains [Pulse](https://pulse.cio.gov), a dashboard that
286 |   tracks how federal government domains are meeting best practices on
287 |   the web. [Pulse is open source](https://github.com/18F/pulse).
288 | - The Freedom of the Press Foundation runs
289 |   [securethe.news](https://securethe.news), a site that aims to "track
290 |   and promote the adoption of HTTPS encryption by major news
291 |   organizations' websites". [Secure the News is open
292 |   source](https://securethe.news/blog/secure-news-open-source/).
293 | - DHS issues [HTTPS Reports](https://18f.gsa.gov/2017/01/06/open-source-collaboration-across-agencies-to-improve-https-deployment/)
294 |   to federal executive branch agencies.
295 | 
296 | ## Acknowledgements ##
297 | 
298 | This code was modeled after [Ben
299 | Balter](https://github.com/benbalter)'s
300 | [site-inspector](https://github.com/benbalter/site-inspector), with
301 | significant guidance from [Eric Mill](https://github.com/konklone).
302 | 
303 | ## Contributing ##
304 | 
305 | We welcome contributions!  Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for
306 | details.
307 | 
308 | ## License ##
309 | 
310 | This project is in the worldwide [public domain](LICENSE).
311 | 
312 | This project is in the public domain within the United States, and
313 | copyright and related rights in the work worldwide are waived through
314 | the [CC0 1.0 Universal public domain
315 | dedication](https://creativecommons.org/publicdomain/zero/1.0/).
316 | 
317 | All contributions to this project will be released under the CC0
318 | dedication. By submitting a pull request, you are agreeing to comply
319 | with this waiver of copyright interest.
320 | 


--------------------------------------------------------------------------------
/bump-version:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # bump-version [--push] [--label LABEL] (major | minor | patch | prerelease | build | finalize | show)
  4 | # bump-version --list-files
  5 | 
  6 | set -o nounset
  7 | set -o errexit
  8 | set -o pipefail
  9 | 
 10 | # Stores the canonical version for the project.
 11 | VERSION_FILE=src/pshtt/_version.py
 12 | # Files that should be updated with the new version.
 13 | VERSION_FILES=("$VERSION_FILE")
 14 | 
 15 | USAGE=$(
 16 |   cat << END_OF_LINE
 17 | Update the version of the project.
 18 | 
 19 | Usage:
 20 |   ${0##*/} [--push] [--label LABEL] (major | minor | patch | prerelease | build | finalize | show)
 21 |   ${0##*/} --list-files
 22 |   ${0##*/} (-h | --help)
 23 | 
 24 | Options:
 25 |   -h | --help    Show this message.
 26 |   --push         Perform a \`git push\` after updating the version.
 27 |   --label LABEL  Specify the label to use when updating the build or prerelease version.
 28 |   --list-files   List the files that will be updated when the version is bumped.
 29 | END_OF_LINE
 30 | )
 31 | 
 32 | old_version=$(sed -n "s/^__version__ = \"\(.*\)\"$/\1/p" $VERSION_FILE)
 33 | # Comment out periods so they are interpreted as periods and don't
 34 | # just match any character
 35 | old_version_regex=${old_version//\./\\\.}
 36 | new_version="$old_version"
 37 | 
 38 | bump_part=""
 39 | label=""
 40 | commit_prefix="Bump"
 41 | with_push=false
 42 | commands_with_label=("build" "prerelease")
 43 | commands_with_prerelease=("major" "minor" "patch")
 44 | with_prerelease=false
 45 | 
 46 | #######################################
 47 | # Display an error message, the help information, and exit with a non-zero status.
 48 | # Arguments:
 49 | #   Error message.
 50 | #######################################
 51 | function invalid_option() {
 52 |   echo "$1"
 53 |   echo "$USAGE"
 54 |   exit 1
 55 | }
 56 | 
 57 | #######################################
 58 | # Bump the version using the provided command.
 59 | # Arguments:
 60 | #   The version to bump.
 61 | #   The command to bump the version.
 62 | # Returns:
 63 | #   The new version.
 64 | #######################################
 65 | function bump_version() {
 66 |   local temp_version
 67 |   temp_version=$(python -c "import semver; print(semver.parse_version_info('$1').${2})")
 68 |   echo "$temp_version"
 69 | }
 70 | 
 71 | if [ $# -eq 0 ]; then
 72 |   echo "$USAGE"
 73 |   exit 1
 74 | else
 75 |   while [ $# -gt 0 ]; do
 76 |     case $1 in
 77 |       --push)
 78 |         if [ "$with_push" = true ]; then
 79 |           invalid_option "Push has already been set."
 80 |         fi
 81 | 
 82 |         with_push=true
 83 |         shift
 84 |         ;;
 85 |       --label)
 86 |         if [ -n "$label" ]; then
 87 |           invalid_option "Label has already been set."
 88 |         fi
 89 | 
 90 |         label="$2"
 91 |         shift 2
 92 |         ;;
 93 |       build | finalize | major | minor | patch)
 94 |         if [ -n "$bump_part" ]; then
 95 |           invalid_option "Only one version part should be bumped at a time."
 96 |         fi
 97 | 
 98 |         bump_part="$1"
 99 |         shift
100 |         ;;
101 |       prerelease)
102 |         with_prerelease=true
103 |         shift
104 |         ;;
105 |       show)
106 |         echo "$old_version"
107 |         exit 0
108 |         ;;
109 |       -h | --help)
110 |         echo "$USAGE"
111 |         exit 0
112 |         ;;
113 |       --list-files)
114 |         printf '%s\n' "${VERSION_FILES[@]}"
115 |         exit 0
116 |         ;;
117 |       *)
118 |         invalid_option "Invalid option: $1"
119 |         ;;
120 |     esac
121 |   done
122 | fi
123 | 
124 | if [ -n "$label" ] && [ "$with_prerelease" = false ] && [[ ! " ${commands_with_label[*]} " =~ [[:space:]]${bump_part}[[:space:]] ]]; then
125 |   invalid_option "Setting the label is only allowed for the following commands: ${commands_with_label[*]}"
126 | fi
127 | 
128 | if [ "$with_prerelease" = true ] && [ -n "$bump_part" ] && [[ ! " ${commands_with_prerelease[*]} " =~ [[:space:]]${bump_part}[[:space:]] ]]; then
129 |   invalid_option "Changing the prerelease is only allowed in conjunction with the following commands: ${commands_with_prerelease[*]}"
130 | fi
131 | 
132 | label_option=""
133 | if [ -n "$label" ]; then
134 |   label_option="token='$label'"
135 | fi
136 | 
137 | if [ -n "$bump_part" ]; then
138 |   if [ "$bump_part" = "finalize" ]; then
139 |     commit_prefix="Finalize"
140 |     bump_command="finalize_version()"
141 |   elif [ "$bump_part" = "build" ]; then
142 |     bump_command="bump_${bump_part}($label_option)"
143 |   else
144 |     bump_command="bump_${bump_part}()"
145 |   fi
146 |   new_version=$(bump_version "$old_version" "$bump_command")
147 |   echo Changing version from "$old_version" to "$new_version"
148 | fi
149 | 
150 | if [ "$with_prerelease" = true ]; then
151 |   bump_command="bump_prerelease($label_option)"
152 |   temp_version=$(bump_version "$new_version" "$bump_command")
153 |   echo Changing version from "$new_version" to "$temp_version"
154 |   new_version="$temp_version"
155 | fi
156 | 
157 | tmp_file=/tmp/version.$$
158 | for version_file in "${VERSION_FILES[@]}"; do
159 |   if [ ! -f "$version_file" ]; then
160 |     echo Missing expected file: "$version_file"
161 |     exit 1
162 |   fi
163 |   sed "s/$old_version_regex/$new_version/" "$version_file" > $tmp_file
164 |   mv $tmp_file "$version_file"
165 | done
166 | 
167 | git add "${VERSION_FILES[@]}"
168 | git commit --message "$commit_prefix version from $old_version to $new_version"
169 | 
170 | if [ "$with_push" = true ]; then
171 |   git push
172 | fi
173 | 


--------------------------------------------------------------------------------
/gce-scripts/README.md:
--------------------------------------------------------------------------------
  1 | # Pshtt as an HTTPS status checker #
  2 | 
  3 | Welcome! This is the documentation on how to run pshtt to scan sites for their
  4 | HTTPS status. These instructions are mostly about how to run it at scale, but at
  5 | the end, there are instructions on how to run on a local instance.
  6 | 
  7 | This document goes over how to both run pshtt on multiple instances on google
  8 | cloud engine and also how to run it as a singular instance on your local
  9 | machine. It takes about 30 minutes to set up from start to finish.
 10 | 
 11 | Running pshtt on 150 instances takes about 12 - 15 hours for a million sites.
 12 | Assume at worst that each site will take 10 seconds (which is the default
 13 | timeout) and scale up to whatever timeframe you want to run in based off of
 14 | that.
 15 | 
 16 | Example: 1000 sites in 2 hours would take 2 instances.
 17 | 
 18 | ## How to run pshtt on Google Cloud Engine ##
 19 | 
 20 | ### Before you run ###
 21 | 
 22 | 1. Set up a [google compute engine
 23 |     account](https://cloud.google.com/compute/docs/access/user-accounts/).
 24 | 
 25 | 1. Make sure you have the correct quota allowances.
 26 |     - Go to the [quotas page](https://cloud.google.com/compute/quotas)
 27 |       and select the project that you want to run this under.
 28 |     - Request quotas --- click on the following items in the list and click
 29 |       "edit qutoas" at the top of the page:
 30 |       - CPUS (all regions) --> 150
 31 |       - In use IP addresses --> 150
 32 |       - One Region's in use IPs (ex us-west1) --> 150
 33 |       - Same Region's CPUs (ex. us-west1) --> 150
 34 | 
 35 | 1. Create Instance Group Template.
 36 | 
 37 |     You will want to run multiple instances (presumably), and creating an
 38 |     Instance Group template allows you to make up to 150 machines under the same
 39 |     template.
 40 | 
 41 |     - Go to Compute Engine, then click on the Instance templates
 42 |       tab and click "Create Instance Template".
 43 |     - Name --> "pshtt-template"
 44 |     - Machine type -- 1 CPU (n1-standard-1 (1 vCPU, 3.75 GB memory)).
 45 |     - Check allow HTTP and HTTPS traffic.
 46 |     - Boot Disk --- Ubuntu 14.04 LTS.
 47 |     - automatic restart (under management tab) -- off.
 48 |     - Hit create.
 49 | 
 50 | 1. Create a ssh key ONLY for the google cloud instances and upload to your
 51 |     profile.
 52 | 
 53 |     This is a security measure. ***DO NOT USE YOUR REGULAR SSH KEY.***
 54 | 
 55 |     - `cd ~/.ssh && ssh-keygen -t rsa -f gce_pshtt_key`
 56 |     - Go to the [metadata
 57 |       tab](https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys)
 58 |       and hit edit.
 59 |     - `cd ~/.ssh && cat gce_pshtt_key.pub`
 60 |     - Copy the output of the above command and paste it into the console.
 61 | 
 62 | 1. Create the instance group.
 63 | 
 64 |     It is important to name your instance group something identifiable,
 65 |     especially if you are sharing a project with others. Remember this instance
 66 |     group name for a later step. ***We recommend that you try one instance at
 67 |     first to make sure it works***.
 68 | 
 69 |     - Go to the instance group tab.
 70 |     - Click Multi-Zone, and select the region that you requested your
 71 |       instances for.
 72 |     - Chose "pshtt-template" under instance template.
 73 |     - Hit create.
 74 |     - Welcome to your new instance group!
 75 | 
 76 | ### Updating data files and setting up to run ###
 77 | 
 78 | The following is a set of commands to run to make your running directory.
 79 | 
 80 | 1. Download the gcloud command line tool.
 81 | 
 82 |     - Follow the [download
 83 |       link](https://cloud.google.com/sdk/docs/#install_the_latest_cloud_tools_version_cloudsdk_current_version)
 84 |       and install the correct sdk for your OS.
 85 |     - If this is your first time installing the gcloud command line tool,
 86 |       follow the instructions on the page. Do not set any default zones.
 87 |     - If you already have this installed, following the following
 88 |       instructions:
 89 |     - `gcloud init`
 90 |       - Click `2` create a new configuration.
 91 |       - Enter `pshtt-configuration`
 92 |       - Choose the appropriate account
 93 |       - Click the appopriate number corresponding to your google project
 94 |       - If it complains that the API is not enabled, hit enabled and retry.
 95 |       - Do not set default zone or region
 96 |       - At this point, your default project should be this google project.
 97 |         You can switch to any of your previous projects by running `gcloud
 98 |         config set project PROJECTNAME`
 99 | 
100 | 1. Setting up your directory.
101 | 
102 |     - `mkdir ~/pshtt_run`
103 |       - Creates the dir that you will run your program out of.
104 |     - `gcloud compute instances list | sed -n '1!p' | grep
105 |       "<instance-group-name>" | awk '{print $5}' > ~/pshtt_run/hosts.txt`
106 |     - `<instance-group-name>` is what you named the instance group you created
107 |       above.
108 | 
109 | 1. Copy all .sh scripts from this directory:
110 | 
111 |     - Keep the name of the scripts the same.
112 |     - `chmod +x ~/pshtt_run/*.sh`
113 |       - which will make all the scripts executable.
114 |     - `touch domains.csv`
115 |       - Your domain list, one domain per line, with the input list ending in
116 |         `.csv`.
117 |       - Domains must have the schema stripped of them and no trailing '/',
118 |         such as:
119 |         - `domain.tld`
120 |         - `subdomain.domain.tld`
121 |         - `www.subdomain.domain.tld`
122 |     - `mkdir ~/pshtt_run/data_results/`
123 |     - `mv ~/pshtt_run/combine_shards.py ~/pshtt_run/data_results`
124 |       - Places combine_shards.py into data_results/.
125 |     - `mkdir ~/pshtt_run/input_files/`
126 | 
127 | 1. roots.pem
128 | 
129 |     We want to use our own CA file when running pshtt. We use the mozilla root
130 |     store for this purpose. Follow instructions on this
131 |     [PR](https://github.com/agl/extract-nss-root-certs).
132 | 
133 | 1. Updating ssh key
134 | 
135 |     - If your new ssh key is called "gce_pshtt_key", skip this step.
136 |     - If you did not name your ***new*** ssh key gce_pshtt_key, then you will
137 |       need to go through and rename the gce_pshtt_key in all the .sh files to
138 |       whatever you named your key.
139 |     - In vim, this is `:%s/gce_pshtt_key/yourkeynamehere/g <enter>`.
140 | 
141 | ### How to run ###
142 | 
143 | 1. `screen -S pshtt_running`
144 | 1. `cd ~/pshtt_run/`
145 | 1. `./run_all_scripts <input_file_name> <number_of_shards> <shard_name> >
146 |     log.out`
147 |     - Number of shards == number of hosts
148 |     - Each machine will contain a shard of the data to run.
149 |     - This is the script that sets up all machines and puts all datafiles on
150 |       the machines for running.
151 |     - `./run_all_scripts top-1m.nocommas.8.31.2017 100 alexa`
152 |     - Will produce 100 shards all starting with "alexa" in the input_files
153 |       dir.
154 |       - ex. alexa000.csv
155 |     - NOTE: you can ONLY create 999 shards. If you need more than 999 shards,
156 |       you will need to change the split_up_dataset.sh file.
157 | 1. Exit screen `cntr+a+d`
158 | 
159 | ### During the run ###
160 | 
161 | - `./check_instances.sh`
162 |   - Will print the ip of each host, as well as FINISHED or NOT FINISHED.
163 | 
164 | ### After the run ###
165 | 
166 | - `./grab_and_combine_data.sh`
167 |   - Will grab all log and result data files, combine data files into one
168 |     large result file, and put these into data_results/.
169 | - Delete your instance group. If you want to run data analysis, jump down to
170 |   the data analysis portion.
171 | 
172 | ## Running pshtt on your local machine ##
173 | 
174 | 1. Copy packages_to_install.sh and install the packages_to_install.sh.
175 |     - `sudo ./packages_to_install.sh`
176 | 1. Clone pshtt.
177 |     - `git clone https://github.com/dhs-ncats/pshtt.git`
178 | 1. Put roots.pem, running_script.sh, and your input file in the same dir as
179 |     pshtt.
180 |     - Follow directions under Updating data files above on how to get a
181 |       roots.pem.
182 |     - Domains must have the schema stripped of them and no trailing '/', such
183 |       as:
184 |       - `domain.tld`
185 |       - `subdomain.domain.tld`
186 |       - `www.subdomain.domain.tld`
187 |     - `chmod +x running_script.sh` to make it executable.
188 | 1. Run `./running_script.sh <input_filename>`
189 | 1. Results and profit.
190 |     - Results can be found in `<input_filename>.json`.
191 |     - If you want to be able to use this json file with any of the colab
192 |       notebooks (like the one listed below), you will also need to run
193 |       combine_shards.py.into the same dir as the json file.
194 |       - Copy combine_shards.py into the same dir as the json file.
195 |       - `echo <input_filename>.json > to_combine.txt`
196 |       - `python combine_shards.py to_combine.txt > final_results.json`
197 |     - Log can be found in `time_<input_filename>.txt`.
198 | 


--------------------------------------------------------------------------------
/gce-scripts/check_instances.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Checks all the instances in hosts and checks the end of the log file
 4 | # to see if it's finished. The script prints out FINISHED or NOT FINISHED
 5 | # for each host respectively.
 6 | 
 7 | hosts_file='hosts.txt'
 8 | list_of_files=$(ls -1q input_files)
 9 | i=1
10 | 
11 | # Grab the correct input file for the corresponding machine.
12 | for z in $list_of_files; do
13 |   machine=$(sed "${i}q;d" $hosts_file)
14 |   # Check if the file has 'Wrote Results', which indicates that it's finished.
15 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail pshtt/time_"${z}".txt | grep -q 'Wrote results'
16 |   finished=$?
17 |   if [[ "${finished}" -eq 0 ]]; then
18 |     echo 'server '"${machine}"' FINISHED'
19 |   else
20 |     echo 'server '"${machine}"' NOT FINISHED'
21 |   fi
22 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" cat pshtt/time_"${z}".txt | grep -q 'Traceback'
23 |   error=$?
24 |   if [[ "${error}" -eq 0 ]]; then
25 |     echo 'server '"${machine}"' ERROR ON THIS MACHINE. CHECK INSTANCE.'
26 |   else
27 |     echo 'server '"${machine}"' NO ERROR.'
28 |   fi
29 |   ((i = i + 1))
30 | done
31 | 


--------------------------------------------------------------------------------
/gce-scripts/combine_shards.py:
--------------------------------------------------------------------------------
 1 | """Combines pshtt shards into one final data file."""
 2 | 
 3 | # Standard Python Libraries
 4 | import json
 5 | import sys
 6 | 
 7 | 
 8 | def main():
 9 |     """Read a file with a list of shard filenames and combine them."""
10 |     if (len(sys.argv)) < 2:
11 |         print("you need a filename!")
12 |         exit(1)
13 |     # Master file is the file with the list of filenames to intake.
14 |     # Fileception.
15 |     master_file = sys.argv[1]
16 |     filenames = []
17 | 
18 |     # Read in the filenames that are the different shards.
19 |     with open(master_file) as input_file:
20 |         for line in input_file:
21 |             filenames.append(line.rstrip())
22 |     # For each shard, read it in and append to the final list to
23 |     # print out.
24 |     for f in filenames:
25 |         with open(f) as input_file:
26 |             json_data = json.load(input_file)
27 |             for item in json_data:
28 |                 print(json.dumps(item))
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/gce-scripts/grab_and_combine_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If pshtt is done on all machines, it grabs both
 4 | # the log file and the output file from the machines and
 5 | # places them in the data_results/ directory.
 6 | 
 7 | # This script also sets up the files to be combined by
 8 | # the combine_shards script. Because pshtt outputs the results
 9 | # as a list of dicts, we need to combine all of those lists.
10 | # We output the dicts as a file of dicts, one per line.
11 | hosts_file='hosts.txt'
12 | list_of_files=$(ls -1q input_files)
13 | i=1
14 | 
15 | for z in $list_of_files; do
16 |   machine=$(sed "${i}q;d" $hosts_file)
17 |   echo 'Kicking off '"${machine}"' number '$i
18 |   # Grab the actual result file.
19 |   echo 'grabbing result file'
20 |   scp -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}":~/pshtt/"${z}".json data_results/
21 |   echo $?
22 |   # Grab the log file from that machine.
23 |   echo 'grabbing log file'
24 |   scp -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}":~/pshtt/time_"${z}".txt data_results/
25 |   echo $?
26 |   echo 'creating to_combine.txt'
27 |   touch data_results/to_combine.txt
28 |   echo $?
29 |   echo 'putting file name into combine script'
30 |   "${z}"'.json' >> data_results/to_combine.txt
31 |   echo $?
32 |   ((i = i + 1))
33 | done
34 | 
35 | cd data_results || exit
36 | python combine_shards.py to_combine.txt > final_results.json
37 | 


--------------------------------------------------------------------------------
/gce-scripts/packages_to_install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Installs all the necessary packages for pshtt to run.
 4 | # Logs which package it is installing as well as it's success (0) or failure
 5 | # (1).
 6 | echo 'UPDATE'
 7 | apt-get -y update -qq
 8 | echo $? ' ERROR CODE'
 9 | echo 'GIT'
10 | apt-get -y install git -qq
11 | echo $? ' ERROR CODE'
12 | echo 'PYTHON3-PIP'
13 | apt-get -y install python3-pip -qq
14 | echo $? ' ERROR CODE'
15 | echo 'LIBFFI6'
16 | apt-get -y install libffi6 libffi-dev -qq
17 | echo $? ' ERROR CODE'
18 | echo 'LIBSSL'
19 | apt-get -y install build-essential libssl-dev libffi-dev python3-dev -qq
20 | echo $? ' ERROR CODE'
21 | echo 'SETUPTOOLS'
22 | pip3 install --upgrade setuptools -qq
23 | echo $? ' ERROR CODE'
24 | echo 'CFFI'
25 | pip3 install cffi -qq
26 | echo $? ' ERROR CODE'
27 | echo 'SSLYZE'
28 | pip3 install sslyze -qq
29 | echo $? ' ERROR CODE'
30 | echo 'PUBLIC SUFFIX'
31 | pip3 install publicsuffix -qq
32 | echo $? ' ERROR CODE'
33 | echo 'REQUESTS'
34 | pip3 install --upgrade requests -qq
35 | echo $? ' ERROR CODE'
36 | echo 'DOCOPT'
37 | pip3 install docopt -qq
38 | echo $? ' ERROR CODE'
39 | echo 'PYOPENSSL'
40 | pip3 install pyopenssl -qq
41 | echo $? ' ERROR CODE'
42 | echo 'PYTABLEWRITER'
43 | pip3 install pytablewriter -qq
44 | echo $? ' ERROR CODE'
45 | echo 'TYPING'
46 | pip3 install typing -qq
47 | echo $? ' ERROR CODE'
48 | echo 'FINISHED INSTALLING PACKAGES'
49 | 


--------------------------------------------------------------------------------
/gce-scripts/run_all_scripts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This is the first script to run. This script calls
 4 | # all the other pertinent scripts for setting up
 5 | # and kicking off runs.
 6 | 
 7 | # ./run_all_scripts.sh <data_file_to_split> <#_of_shards> <shard_output_prepend>
 8 | # Ex: ./run_all_scripts.sh top-1m.nocommas.8.31.2017 100 alexa
 9 | 
10 | # Only the first input argument is required. The other two will default
11 | # to 10 and shard respectively.
12 | 
13 | # will split up the file top-1m.nocommas.8.31.2017 into 100 files
14 | # into a dir called input_files, and all the files will start with
15 | # alexa_. So the shard files will be alexa000.csv, alexa001.csv
16 | # etc.
17 | 
18 | # If any of the scripts fails, this hard fails and tells the user what script
19 | # went wrong.
20 | 
21 | input_file=$1
22 | number_of_shards=${2-10}
23 | output_file_name=${3-shard_}
24 | 
25 | echo 'Splitting dataset'
26 | ./split_up_dataset.sh "${input_file}" "${number_of_shards}" "${output_file_name}"
27 | error=$?
28 | 
29 | if [[ "${error}" -eq 1 ]]; then
30 |   echo 'ERROR WITH SPLIT DATASET SCRIPT'
31 |   exit 1
32 | fi
33 | 
34 | echo 'Scp and setup'
35 | ./scp_and_setup.sh "${output_file_name}"
36 | error=$?
37 | if [[ "${error}" -eq 1 ]]; then
38 |   echo 'ERROR WITH SCP AND SETUP SCRIPT'
39 |   exit 1
40 | fi
41 | 
42 | echo 'Running instances'
43 | ./run_instances.sh
44 | error=$?
45 | if [[ "${error}" -eq 1 ]]; then
46 |   echo 'ERROR WITH RUNNING INSTANCES SCRIPT'
47 |   exit 1
48 | fi
49 | 


--------------------------------------------------------------------------------
/gce-scripts/run_instances.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Runs pshtt on all instances, using the correct input file.
 4 | 
 5 | hosts_file='hosts.txt'
 6 | list_of_files=$(ls -1q input_files/)
 7 | i=1
 8 | 
 9 | # For each file, find the corresponding machine it's been uploaded to,
10 | # check if the screen exists (create if not) and kick off pshtt on that screen.
11 | 
12 | for z in $list_of_files; do
13 |   machine=$(sed "${i}q;d" $hosts_file)
14 |   # Check if screen exists.
15 |   echo 'Kicking off '"${machine}"' number '$i
16 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -list | grep -q "pshtt_screen"
17 |   answer=$?
18 |   # If screen does not exist, then create it.
19 |   if [[ "${answer}" -eq 1 ]]; then
20 |     echo 'Creating screen'
21 |     ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -S pshtt_screen -d -m
22 |     echo $?
23 |   fi
24 | 
25 |   # Run script in screen.
26 |   echo 'Kicking off script'
27 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" "screen -S pshtt_screen -X -p 0 stuff $'cd pshtt && ./running_script.sh $z\n'"
28 |   echo $?
29 |   ((i = i + 1))
30 | done
31 | 


--------------------------------------------------------------------------------
/gce-scripts/running_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Runs pshtt with a 10 second timeout, with roots.pem as the CA file,
 4 | # and debug on. Logging goes to time_<input_file_name>.txt
 5 | 
 6 | # ./running_script.sh test_file.csv
 7 | # output files: test_file.csv.json, time_test_file.csv.txt
 8 | 
 9 | input_file=$1
10 | (time python3 -m pshtt.cli "${input_file}" -t 10 -u -j -o "${input_file}".json -f "roots.pem" --debug) 2> time_"${input_file}".txt
11 | 


--------------------------------------------------------------------------------
/gce-scripts/scp_and_setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file is broken up into three distinct parts.
  4 | # The first part is uploading the packages to install
  5 | # script to all machines, and kicking it off.
  6 | # We do this first because 1) we need those packages to do anything else
  7 | # and 2) it takes about 10 - 15 seconds per machine, so we parallelize it.
  8 | 
  9 | # The second part is simply a check to see if the packages are finished
 10 | # installing. We test the last machine in the list first because if that is
 11 | # finished then all the other machines SHOULD also be finished. After we verify
 12 | # that the last machine is finished, loop back through all of the machines and
 13 | # make sure that they've all finished. If they haven't print out an error
 14 | # warning for that machine and stop the whole process.
 15 | # Takes the host file and the list of shards and
 16 | # scps shards to hosts.
 17 | # Also scps various scripts and installs pshtt
 18 | # and all the necessary packages.
 19 | # List of IPs, separated by line
 20 | hosts_file='hosts.txt'
 21 | # number of files that we need to cycle through
 22 | num_files=$(find input_files/ -mindepth 1 -maxdepth 1 | wc -l)
 23 | # list of files; we do this deterministically
 24 | # because then we can run this command across
 25 | # other scripts and expect the same order of files.
 26 | list_of_files=$(find input_files/ -mindepth 1 -maxdepth 1)
 27 | # We flip this bit if we find an error with any of the machines. This tells us
 28 | # to stop the process so that the user can go by hand and fix the machine.
 29 | error_with_packages=1
 30 | 
 31 | # Upload script and install packages on all machines.
 32 | # parallelized.
 33 | ################################################################
 34 | for i in seq 1 $num_files; do
 35 |   # Grab the ip from hosts.txt that corresponds to the file number we are
 36 |   # uploading.
 37 |   # If we are uploading file #3 in the list, go to line 3 in the hosts file
 38 |   # and upload to that ip.
 39 | 
 40 |   machine=$(sed "${i}q;d" $hosts_file)
 41 |   echo "Now on ${machine} number ${i}"
 42 |   # Do not do strict host key checking so that you dont have to type "yes" for
 43 |   # each machine.
 44 |   echo 'Uploading packages_to_install.sh'
 45 |   scp -i ~/.ssh/gce_pshtt_key -o "StrictHostKeyChecking no" packages_to_install.sh ubuntu@"${machine}":~/
 46 |   echo $?
 47 |   # We echo after each command to ensure that it worked. 0 means success.
 48 |   # The Log file is how we can tell if the packages have all been uploaded.
 49 |   echo 'Creating packages log file'
 50 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" touch package_log_file.txt
 51 |   echo $?
 52 |   # Check to see if this screen exists already.
 53 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -list | grep -q "package_screen"
 54 |   answer=$?
 55 |   # If the screen exists, then we won't create another one. Otherwise, create.
 56 |   if [[ "${answer}" -eq 1 ]]; then
 57 |     echo 'Creating screen'
 58 |     ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" screen -S package_screen -d -m
 59 |     echo $?
 60 |   fi
 61 |   # Run packages_to_install and pipe to packages_log_file.txt on each machine.
 62 |   ssh -i ~/.ssh/gce_pshtt_key -t ubuntu@"${machine}" "screen -S package_screen -X -p 0 stuff $'sudo ./packages_to_install.sh > package_log_file.txt\n'"
 63 |   echo $?
 64 | done
 65 | 
 66 | # Check that all machines have finished installing packages.
 67 | ###################################################################
 68 | # Grab the last machine in the hosts file. This was the last one to
 69 | # be uploaded and kicked off, so presumably it will be the last one
 70 | # to finish.
 71 | machine=$(sed "${num_files}q;d" $hosts_file)
 72 | while true; do
 73 |   echo 'Waiting on packages to install'
 74 |   # Wait 10 seconds before checking the file again.
 75 |   sleep 10
 76 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail package_log_file.txt | grep -q 'FINISHED INSTALLING PACKAGES'
 77 |   finished=$?
 78 |   if [[ "${finished}" -eq 0 ]]; then
 79 |     break
 80 |   fi
 81 | done
 82 | 
 83 | for i in seq 1 $num_files; do
 84 |   machine=$(sed "${i}q;d" $hosts_file)
 85 |   echo "Now on ${machine} number ${i}"
 86 |   echo 'Checking packages finished installing'
 87 |   ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" tail package_log_file.txt | grep -q 'FINISHED INSTALLING PACKAGES'
 88 |   finished=$?
 89 |   if [[ "${finished}" -eq 0 ]]; then
 90 |     # Check if any of the machines had a problem installing packages.
 91 |     ssh -i ~/.ssh/gce_pshtt_key ubuntu@"${machine}" cat package_log_file.txt | grep -q '1 ERROR CODE'
 92 |     error=$?
 93 |     if [[ "${error}" -eq 0 ]]; then
 94 |       echo 'ERROR WITH '"${machine}"
 95 |       error_with_packages=0
 96 |     fi
 97 |   fi
 98 | done
 99 | 
100 | # If any of the machines had an error with a package, stop the entire process,
101 | # inform the user.
102 | if [[ "${error_with_packages}" -eq 0 ]]; then
103 |   echo 'ERROR FOUND WITH PACKAGES'
104 |   exit 1
105 | fi
106 | 
107 | # Upload remaining data files.
108 | #####################################################################
109 | i=1
110 | for y in $list_of_files; do
111 |   machine=$(sed "${i}q;d" $hosts_file)
112 |   echo "Now on ${machine} number ${i}"
113 |   echo 'Cloning github repo file'
114 |   ssh -i ~/.ssh/gce_pshtt_key -t ubuntu@"${machine}" git clone https://github.com/dhs-ncats/pshtt.git
115 |   echo $?
116 |   echo 'copying data file to pshtt directory'
117 |   scp -i ~/.ssh/gce_pshtt_key "${y}" ubuntu@"${machine}":~/pshtt/
118 |   echo $?
119 |   echo 'Copying roots.pem into pshtt directory'
120 |   scp -i ~/.ssh/gce_pshtt_key "roots.pem" ubuntu@"${machine}":~/pshtt/
121 |   echo $?
122 |   echo 'Copying running script into pshtt directory'
123 |   scp -i ~/.ssh/gce_pshtt_key running_script.sh ubuntu@"${machine}":~/pshtt/
124 |   echo $?
125 |   echo "${y}"
126 |   ((i = i + 1))
127 | done
128 | 


--------------------------------------------------------------------------------
/gce-scripts/split_up_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ./split_up_dataset <input_file> <number_of_shards> <output_file_name>
 4 | # Ex: ./split_up_dataset.sh top-1m.nocommas.8.31.2017 100 alexa
 5 | 
 6 | # Uses split to break up the input file into N shards.
 7 | # Because of how split works, some files will be larger or smaller
 8 | # than others, but the sum of the files will equal the length of the
 9 | # original file.
10 | 
11 | # Add .csv suffix because that's what pshtt takes in.
12 | 
13 | # Place all files into input_files dir for posterity.
14 | 
15 | input_file=$1
16 | number_of_shards=${2-10}
17 | output_file_name=${3-shard_}
18 | 
19 | split -a 3 --number=l/"${number_of_shards}" -d "${input_file}" input_files/"${output_file_name}" --additional-suffix=.csv
20 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | # Increase verbosity, display extra test summary info for tests that did not pass,
3 | # display code coverage results, and enable debug logging
4 | addopts = --verbose -ra --cov --log-cli-level=DEBUG
5 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | --editable .[dev]
2 | --requirement requirements-test.txt
3 | build
4 | ipython
5 | mypy
6 | # The bump-version script requires at least version 3 of semver.
7 | semver>=3
8 | twine
9 | 


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | --editable .[test]
2 | --requirement requirements.txt
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # Note: Add any additional requirements to setup.py's install_requires field
2 | --editable .
3 | wheel
4 | 


--------------------------------------------------------------------------------
/setup-env:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -o nounset
  4 | set -o errexit
  5 | set -o pipefail
  6 | 
  7 | USAGE=$(
  8 |   cat << 'END_OF_LINE'
  9 | Configure a development environment for this repository.
 10 | 
 11 | It does the following:
 12 |   - Allows the user to specify the Python version to use for the virtual environment.
 13 |   - Allows the user to specify a name for the virtual environment.
 14 |   - Verifies pyenv and pyenv-virtualenv are installed.
 15 |   - Creates the Python virtual environment.
 16 |   - Configures the activation of the virtual enviroment for the repo directory.
 17 |   - Installs the requirements needed for development (including mypy type stubs).
 18 |   - Installs git pre-commit hooks.
 19 |   - Configures git remotes for upstream "lineage" repositories.
 20 | 
 21 | Usage:
 22 |   setup-env [--venv-name venv_name] [--python-version python_version]
 23 |   setup-env (-h | --help)
 24 | 
 25 | Options:
 26 |   -f | --force             Delete virtual enviroment if it already exists.
 27 |   -h | --help              Show this message.
 28 |   -i | --install-hooks     Install hook environments for all environments in the
 29 |                            pre-commit config file.
 30 |   -l | --list-versions     List available Python versions and select one interactively.
 31 |   -v | --venv-name         Specify the name of the virtual environment.
 32 |   -p | --python-version    Specify the Python version for the virtual environment.
 33 | 
 34 | END_OF_LINE
 35 | )
 36 | 
 37 | # Display pyenv's installed Python versions
 38 | python_versions() {
 39 |   pyenv versions --bare --skip-aliases --skip-envs
 40 | }
 41 | 
 42 | check_python_version() {
 43 |   local version=$1
 44 | 
 45 |   # This is a valid regex for semantically correct Python version strings.
 46 |   # For more information see here:
 47 |   # https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
 48 |   # Break down the regex into readable parts major.minor.patch
 49 |   local major="0|[1-9]\d*"
 50 |   local minor="0|[1-9]\d*"
 51 |   local patch="0|[1-9]\d*"
 52 | 
 53 |   # Splitting the prerelease part for readability
 54 |   # Start of the prerelease
 55 |   local prerelease="(?:-"
 56 |   # Numeric or alphanumeric identifiers
 57 |   local prerelease+="(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)"
 58 |   # Additional dot-separated identifiers
 59 |   local prerelease+="(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*"
 60 |   # End of the prerelease, making it optional
 61 |   local prerelease+=")?"
 62 |   # Optional build metadata
 63 |   local build="(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
 64 | 
 65 |   # Final regex composed of parts
 66 |   local regex="^($major)\.($minor)\.($patch)$prerelease$build$"
 67 | 
 68 |   # This checks if the Python version does not match the regex pattern specified in $regex,
 69 |   # using Perl for regex matching. If the pattern is not found, then prompt the user with
 70 |   # the invalid version message.
 71 |   if ! echo "$version" | perl -ne "exit(!/$regex/)"; then
 72 |     echo "Invalid version of Python: Python follows semantic versioning," \
 73 |       "so any version string that is not a valid semantic version is an" \
 74 |       "invalid version of Python."
 75 |     exit 1
 76 |   # Else if the Python version isn't installed then notify the user.
 77 |   # grep -E is used for searching through text lines that match the
 78 |   # specific version.
 79 |   elif ! python_versions | grep -E "^${version}$" > /dev/null; then
 80 |     echo "Error: Python version $version is not installed."
 81 |     echo "Installed Python versions are:"
 82 |     python_versions
 83 |     exit 1
 84 |   else
 85 |     echo "Using Python version $version"
 86 |   fi
 87 | }
 88 | 
 89 | # Flag to force deletion and creation of virtual environment
 90 | FORCE=0
 91 | 
 92 | # Initialize the other flags
 93 | INSTALL_HOOKS=0
 94 | LIST_VERSIONS=0
 95 | PYTHON_VERSION=""
 96 | VENV_NAME=""
 97 | 
 98 | # Define long options
 99 | LONGOPTS="force,help,install-hooks,list-versions,python-version:,venv-name:"
100 | 
101 | # Define short options for getopt
102 | SHORTOPTS="fhilp:v:"
103 | 
104 | # Check for GNU getopt by matching a specific pattern ("getopt from util-linux")
105 | # in its version output. This approach presumes the output format remains stable.
106 | # Be aware that format changes could invalidate this check.
107 | if [[ $(getopt --version 2> /dev/null) != *"getopt from util-linux"* ]]; then
108 |   cat << 'END_OF_LINE'
109 | 
110 |   Please note, this script requires GNU getopt due to its enhanced
111 |   functionality and compatibility with certain script features that
112 |   are not supported by the POSIX getopt found in some systems, particularly
113 |   those with a non-GNU version of getopt. This distinction is crucial
114 |   as a system might have a non-GNU version of getopt installed by default,
115 |   which could lead to unexpected behavior.
116 | 
117 |   On macOS, we recommend installing brew (https://brew.sh/).  Then installation
118 |   is as simple as `brew install gnu-getopt` and adding this to your
119 |   profile:
120 | 
121 |   export PATH="$(brew --prefix)/opt/gnu-getopt/bin:$PATH"
122 | 
123 |   GNU getopt must be explicitly added to the PATH since it
124 |   is keg-only (https://docs.brew.sh/FAQ#what-does-keg-only-mean).
125 | 
126 | END_OF_LINE
127 |   exit 1
128 | fi
129 | 
130 | # Check to see if pyenv is installed
131 | if [ -z "$(command -v pyenv)" ] || { [ -z "$(command -v pyenv-virtualenv)" ] && [ ! -f "$(pyenv root)/plugins/pyenv-virtualenv/bin/pyenv-virtualenv" ]; }; then
132 |   echo "pyenv and pyenv-virtualenv are required."
133 |   if [[ "$OSTYPE" == "darwin"* ]]; then
134 |     cat << 'END_OF_LINE'
135 | 
136 |   On macOS, we recommend installing brew, https://brew.sh/.  Then installation
137 |   is as simple as `brew install pyenv pyenv-virtualenv` and adding this to your
138 |   profile:
139 | 
140 |   eval "$(pyenv init -)"
141 |   eval "$(pyenv virtualenv-init -)"
142 | 
143 | END_OF_LINE
144 | 
145 |   fi
146 |   cat << 'END_OF_LINE'
147 |   For Linux, Windows Subsystem for Linux (WSL), or macOS (if you don't want
148 |   to use "brew") you can use https://github.com/pyenv/pyenv-installer to install
149 |   the necessary tools. Before running this ensure that you have installed the
150 |   prerequisites for your platform according to the pyenv wiki page,
151 |   https://github.com/pyenv/pyenv/wiki/common-build-problems.
152 | 
153 |   On WSL you should treat your platform as whatever Linux distribution you've
154 |   chosen to install.
155 | 
156 |   Once you have installed "pyenv" you will need to add the following lines to
157 |   your ".bashrc":
158 | 
159 |   export PATH="$PATH:$HOME/.pyenv/bin"
160 |   eval "$(pyenv init -)"
161 |   eval "$(pyenv virtualenv-init -)"
162 | END_OF_LINE
163 |   exit 1
164 | fi
165 | 
166 | # Use GNU getopt to parse options
167 | if ! PARSED=$(getopt --options $SHORTOPTS --longoptions $LONGOPTS --name "$0" -- "$@"); then
168 |   echo "Error parsing options"
169 |   exit 1
170 | fi
171 | eval set -- "$PARSED"
172 | 
173 | while true; do
174 |   case "$1" in
175 |     -f | --force)
176 |       FORCE=1
177 |       shift
178 |       ;;
179 |     -h | --help)
180 |       echo "$USAGE"
181 |       exit 0
182 |       ;;
183 |     -i | --install-hooks)
184 |       INSTALL_HOOKS=1
185 |       shift
186 |       ;;
187 |     -l | --list-versions)
188 |       LIST_VERSIONS=1
189 |       shift
190 |       ;;
191 |     -p | --python-version)
192 |       PYTHON_VERSION="$2"
193 |       shift 2
194 |       # Check the Python version being passed in.
195 |       check_python_version "$PYTHON_VERSION"
196 |       ;;
197 |     -v | --venv-name)
198 |       VENV_NAME="$2"
199 |       shift 2
200 |       ;;
201 |     --)
202 |       shift
203 |       break
204 |       ;;
205 |     *)
206 |       # Unreachable due to GNU getopt handling all options
207 |       echo "Programming error"
208 |       exit 64
209 |       ;;
210 |   esac
211 | done
212 | 
213 | # Determine the virtual environment name
214 | if [ -n "$VENV_NAME" ]; then
215 |   # Use the user-provided environment name
216 |   env_name="$VENV_NAME"
217 | else
218 |   # Set the environment name to the last part of the working directory.
219 |   env_name=${PWD##*/}
220 | fi
221 | 
222 | # List Python versions and select one interactively.
223 | if [ $LIST_VERSIONS -ne 0 ]; then
224 |   echo Available Python versions:
225 |   python_versions
226 |   # Read the user's desired Python version.
227 |   # -r: treat backslashes as literal, -p: display prompt before input.
228 |   read -r -p "Enter the desired Python version: " PYTHON_VERSION
229 |   # Check the Python version being passed in.
230 |   check_python_version "$PYTHON_VERSION"
231 | fi
232 | 
233 | # Remove any lingering local configuration.
234 | if [ $FORCE -ne 0 ]; then
235 |   rm -f .python-version
236 |   pyenv virtualenv-delete --force "${env_name}" || true
237 | elif [[ -f .python-version ]]; then
238 |   cat << 'END_OF_LINE'
239 |   An existing .python-version file was found.  Either remove this file yourself
240 |   or re-run with the --force option to have it deleted along with the associated
241 |   virtual environment.
242 | 
243 |   rm .python-version
244 | 
245 | END_OF_LINE
246 |   exit 1
247 | fi
248 | 
249 | # Create a new virtual environment for this project
250 | #
251 | # If $PYTHON_VERSION is undefined then the current pyenv Python version will be used.
252 | #
253 | # We can't quote ${PYTHON_VERSION:=} below since if the variable is
254 | # undefined then we want nothing to appear; this is the reason for the
255 | # "shellcheck disable" line below.
256 | #
257 | # shellcheck disable=SC2086
258 | if ! pyenv virtualenv ${PYTHON_VERSION:=} "${env_name}"; then
259 |   cat << END_OF_LINE
260 |   An existing virtual environment named $env_name was found.  Either delete this
261 |   environment yourself or re-run with the --force option to have it deleted.
262 | 
263 |   pyenv virtualenv-delete ${env_name}
264 | 
265 | END_OF_LINE
266 |   exit 1
267 | fi
268 | 
269 | # Set the local application-specific Python version(s) by writing the
270 | # version name to a file named `.python-version'.
271 | pyenv local "${env_name}"
272 | 
273 | # Upgrade pip and friends
274 | python3 -m pip install --upgrade pip setuptools wheel
275 | 
276 | # Find a requirements file (if possible) and install
277 | for req_file in "requirements-dev.txt" "requirements-test.txt" "requirements.txt"; do
278 |   if [[ -f $req_file ]]; then
279 |     pip install --requirement $req_file
280 |     break
281 |   fi
282 | done
283 | 
284 | # Install git pre-commit hooks now or later.
285 | pre-commit install ${INSTALL_HOOKS:+"--install-hooks"}
286 | 
287 | # Setup git remotes from lineage configuration
288 | # This could fail if the remotes are already setup, but that is ok.
289 | set +o errexit
290 | 
291 | eval "$(
292 |   python3 << 'END_OF_LINE'
293 | from pathlib import Path
294 | import yaml
295 | import sys
296 | 
297 | LINEAGE_CONFIG = Path(".github/lineage.yml")
298 | 
299 | if not LINEAGE_CONFIG.exists():
300 |     print("No lineage configuration found.", file=sys.stderr)
301 |     sys.exit(0)
302 | 
303 | with LINEAGE_CONFIG.open("r") as f:
304 |     lineage = yaml.safe_load(stream=f)
305 | 
306 | if lineage["version"] == "1":
307 |     for parent_name, v in lineage["lineage"].items():
308 |         remote_url = v["remote-url"]
309 |         print(f"git remote add {parent_name} {remote_url};")
310 |         print(f"git remote set-url --push {parent_name} no_push;")
311 | else:
312 |     print(f'Unsupported lineage version: {lineage["version"]}', file=sys.stderr)
313 | END_OF_LINE
314 | )"
315 | 
316 | # Install all necessary mypy type stubs
317 | mypy --install-types --non-interactive src/
318 | 
319 | # Qapla'
320 | echo "Success!"
321 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is the setup module for the pshtt project.
  3 | 
  4 | Based on:
  5 | 
  6 | - https://packaging.python.org/distributing/
  7 | - https://github.com/pypa/sampleproject/blob/master/setup.py
  8 | - https://blog.ionelmc.ro/2014/05/25/python-packaging/#the-structure
  9 | """
 10 | 
 11 | # Standard Python Libraries
 12 | import codecs
 13 | from glob import glob
 14 | from os.path import abspath, basename, dirname, join, splitext
 15 | 
 16 | # Third-Party Libraries
 17 | from setuptools import find_packages, setup
 18 | 
 19 | 
 20 | def readme():
 21 |     """Read in and return the contents of the project's README.md file."""
 22 |     with open("README.md", encoding="utf-8") as f:
 23 |         return f.read()
 24 | 
 25 | 
 26 | # Below two methods were pulled from:
 27 | # https://packaging.python.org/guides/single-sourcing-package-version/
 28 | def read(rel_path):
 29 |     """Open a file for reading from a given relative path."""
 30 |     here = abspath(dirname(__file__))
 31 |     with codecs.open(join(here, rel_path), "r") as fp:
 32 |         return fp.read()
 33 | 
 34 | 
 35 | def get_version(version_file):
 36 |     """Extract a version number from the given file path."""
 37 |     for line in read(version_file).splitlines():
 38 |         if line.startswith("__version__"):
 39 |             delim = '"' if '"' in line else "'"
 40 |             return line.split(delim)[1]
 41 |     raise RuntimeError("Unable to find version string.")
 42 | 
 43 | 
 44 | setup(
 45 |     name="pshtt",
 46 |     # Versions should comply with PEP440
 47 |     version=get_version("src/pshtt/_version.py"),
 48 |     description="Scan websites for HTTPS deployment best practices",
 49 |     long_description=readme(),
 50 |     long_description_content_type="text/markdown",
 51 |     # Landing page for CISA's cybersecurity mission
 52 |     url="https://www.cisa.gov/cybersecurity",
 53 |     # Additional URLs for this project per
 54 |     # https://packaging.python.org/guides/distributing-packages-using-setuptools/#project-urls
 55 |     project_urls={
 56 |         "Source": "https://github.com/cisagov/pshtt",
 57 |         "Tracker": "https://github.com/cisagov/pshtt/issues",
 58 |     },
 59 |     # Author details
 60 |     author="Cybersecurity and Infrastructure Security Agency",
 61 |     author_email="github@cisa.dhs.gov",
 62 |     license="License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
 63 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
 64 |     classifiers=[
 65 |         # How mature is this project? Common values are
 66 |         #   3 - Alpha
 67 |         #   4 - Beta
 68 |         #   5 - Production/Stable
 69 |         "Development Status :: 4 - Beta",
 70 |         # Indicate who your project is intended for
 71 |         "Intended Audience :: Developers",
 72 |         # Pick your license as you wish (should match "license" above)
 73 |         "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
 74 |         # Specify the Python versions you support here. In particular, ensure
 75 |         # that you indicate whether you support Python 2, Python 3 or both.
 76 |         "Programming Language :: Python :: 3",
 77 |         "Programming Language :: Python :: 3 :: Only",
 78 |         "Programming Language :: Python :: 3.7",
 79 |         "Programming Language :: Python :: 3.8",
 80 |         "Programming Language :: Python :: 3.9",
 81 |         "Programming Language :: Python :: 3.10",
 82 |         # "Programming Language :: Python :: 3.11",
 83 |         # "Programming Language :: Python :: 3.12",
 84 |         # "Programming Language :: Python :: 3.13",
 85 |         "Programming Language :: Python :: Implementation :: CPython",
 86 |     ],
 87 |     python_requires=">=3.7",
 88 |     # What does your project relate to?
 89 |     keywords="https best practices",
 90 |     packages=find_packages(where="src"),
 91 |     package_dir={"": "src"},
 92 |     py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")],
 93 |     install_requires=[
 94 |         "docopt>=0.6.2",
 95 |         "publicsuffixlist[update]>=0.9.2 ",
 96 |         "pyopenssl>=17.5.0",
 97 |         "pytablereader>=0.15.0",
 98 |         "pytablewriter>=0.27.2",
 99 |         "python-dateutil>=2.7.3",
100 |         "pytz>=2018.5",
101 |         "requests>=2.18.4",
102 |         "setuptools",
103 |         "sslyze>=3.0.0,<5.0.0",
104 |         "wget>=3.2",
105 |     ],
106 |     extras_require={
107 |         # IMPORTANT: Keep type hinting-related dependencies of the dev section
108 |         # in sync with the mypy pre-commit hook configuration (see
109 |         # .pre-commit-config.yaml). Any changes to type hinting-related
110 |         # dependencies here should be reflected in the additional_dependencies
111 |         # field of the mypy pre-commit hook to avoid discrepancies in type
112 |         # checking between environments.
113 |         "dev": [
114 |             "types-docopt",
115 |             "types-pyOpenSSL",
116 |             "types-requests",
117 |             "types-setuptools",
118 |             "types-urllib3",
119 |         ],
120 |         "test": [
121 |             "coverage",
122 |             "coveralls",
123 |             "pre-commit",
124 |             "pytest-cov",
125 |             "pytest",
126 |         ],
127 |     },
128 |     # Conveniently allows one to run the CLI tool as `pshtt`
129 |     entry_points={"console_scripts": ["pshtt = pshtt.cli:main"]},
130 | )
131 | 


--------------------------------------------------------------------------------
/src/pshtt/__init__.py:
--------------------------------------------------------------------------------
 1 | """The pshtt library."""
 2 | 
 3 | # Standard Python Libraries
 4 | from typing import List
 5 | 
 6 | # We disable a Flake8 check for "Module imported but unused (F401)" here because
 7 | # although this import is not directly used, it populates the value
 8 | # package_name.__version__, which is used to get version information about this
 9 | # Python package.
10 | from ._version import __version__  # noqa: F401
11 | 
12 | __all__: List[str] = []
13 | 


--------------------------------------------------------------------------------
/src/pshtt/__main__.py:
--------------------------------------------------------------------------------
1 | """Code to run if this package is used as a Python module."""
2 | 
3 | from .cli import main
4 | 
5 | main()
6 | 


--------------------------------------------------------------------------------
/src/pshtt/_version.py:
--------------------------------------------------------------------------------
1 | """This file defines the version of this module."""
2 | 
3 | __version__ = "0.7.1"
4 | 


--------------------------------------------------------------------------------
/src/pshtt/cli.py:
--------------------------------------------------------------------------------
  1 | """pshtt ("pushed") is a tool to test domains for HTTPS best practices.
  2 | 
  3 | Usage:
  4 |   pshtt (INPUT ...) [--output OUTFILE] [--sorted] [--json] [--markdown] [--debug] [--timeout TIMEOUT] [--user-agent AGENT] [--cache-third-parties DIR] [--ca-file PATH] [--pt-int-ca-file PATH]
  5 |   pshtt (-h | --help)
  6 | 
  7 | Options:
  8 |   -h --help                     Show this message.
  9 |   -s --sorted                   Sort output by domain, A-Z.
 10 |   -o --output=OUTFILE           Name output file. (Defaults to "results".)
 11 |   -j --json                     Get results in JSON. (Defaults to CSV.)
 12 |   -m --markdown                 Get results in Markdown. (Defaults to CSV.)
 13 |   -d --debug                    Print debug output.
 14 |   -u --user-agent=AGENT         Override user agent.
 15 |   -t --timeout=TIMEOUT          Override timeout (in seconds).
 16 |   -c --cache-third-parties=DIR  Cache third party data, and what directory to cache it in.
 17 |   -f --ca-file=PATH             Specify custom CA bundle (PEM format)
 18 |   -p --pt-int-ca-file=PATH       Specify public trust CA bundle with intermediates (PEM format)
 19 | 
 20 | Notes:
 21 |   If the first INPUT ends with .csv, domains will be read from CSV.
 22 |   CSV output will always be written to disk, defaulting to results.csv.
 23 | """
 24 | 
 25 | # Standard Python Libraries
 26 | import csv
 27 | import logging
 28 | import sys
 29 | 
 30 | # Third-Party Libraries
 31 | import docopt
 32 | import pytablewriter
 33 | 
 34 | from . import pshtt, utils
 35 | from ._version import __version__
 36 | from .utils import smart_open
 37 | 
 38 | 
 39 | def to_csv(results, out_filename):
 40 |     """Output the provided results in CSV format to the provided filename."""
 41 |     utils.debug("Opening CSV file: %s", out_filename)
 42 |     with smart_open(out_filename) as out_file:
 43 |         writer = csv.writer(out_file)
 44 | 
 45 |         # Write out header
 46 |         writer.writerow(pshtt.HEADERS)
 47 | 
 48 |         # Write out the row data as it completes
 49 |         for result in results:
 50 |             row = [result[header] for header in pshtt.HEADERS]
 51 |             writer.writerow(row)
 52 | 
 53 |     logging.warning("Wrote results to %s.", out_filename)
 54 | 
 55 | 
 56 | def to_json(results, out_filename):
 57 |     """Output the provided results in JSON format to the provided filename."""
 58 |     # Generate (yield) all the results before exporting to JSON
 59 |     results = list(results)
 60 | 
 61 |     with smart_open(out_filename) as out_file:
 62 |         json_content = utils.json_for(results)
 63 | 
 64 |         out_file.write(json_content + "\n")
 65 | 
 66 |         if out_file is not sys.stdout:
 67 |             logging.warning("Wrote results to %s.", out_filename)
 68 | 
 69 | 
 70 | def to_markdown(results, out_filename):
 71 |     """Output the provided results in Markdown format to the provided filename."""
 72 |     # Generate (yield) all the results before exporting to Markdown
 73 |     table = [[f" {result[header]}" for header in pshtt.HEADERS] for result in results]
 74 | 
 75 |     utils.debug("Printing Markdown...", divider=True)
 76 |     with smart_open(out_filename) as out_file:
 77 |         writer = pytablewriter.MarkdownTableWriter()
 78 | 
 79 |         writer.header_list = pshtt.HEADERS
 80 |         writer.value_matrix = table
 81 |         writer.stream = out_file
 82 | 
 83 |         writer.write_table()
 84 | 
 85 | 
 86 | def main():
 87 |     """Provide a command line interface to the pshtt library."""
 88 |     args = docopt.docopt(__doc__, version=__version__)
 89 |     utils.configure_logging(args["--debug"])
 90 | 
 91 |     out_filename = args["--output"]
 92 | 
 93 |     # Read from a .csv, or allow domains on the command line.
 94 |     domains = []
 95 |     if args["INPUT"][0].endswith(".csv"):
 96 |         domains = utils.load_domains(args["INPUT"][0])
 97 |     else:
 98 |         domains = args["INPUT"]
 99 | 
100 |     domains = utils.format_domains(domains)
101 | 
102 |     # If the user wants to sort them, sort them in place.
103 |     if args["--sorted"]:
104 |         domains.sort()
105 | 
106 |     options = {
107 |         "user_agent": args["--user-agent"],
108 |         "timeout": args["--timeout"],
109 |         "cache-third-parties": args["--cache-third-parties"],
110 |         "ca_file": args["--ca-file"],
111 |         "pt_int_ca_file": args["--pt-int-ca-file"],
112 |     }
113 | 
114 |     # Do the domain inspections
115 |     results = pshtt.inspect_domains(domains, options)
116 | 
117 |     # JSON can go to STDOUT, or to a file.
118 |     if args["--json"]:
119 |         to_json(results, out_filename)
120 | 
121 |     # Markdown can go to STDOUT, or to a file
122 |     elif args["--markdown"]:
123 |         to_markdown(results, out_filename)
124 | 
125 |     # CSV always goes to a file.
126 |     else:
127 |         if out_filename is None:
128 |             out_filename = "results.csv"
129 | 
130 |         to_csv(results, out_filename)
131 | 


--------------------------------------------------------------------------------
/src/pshtt/models.py:
--------------------------------------------------------------------------------
  1 | """Define the models used in this library."""
  2 | 
  3 | 
  4 | class Domain:
  5 |     """Define the domain model."""
  6 | 
  7 |     def __init__(self, domain):
  8 |         """Initialize the model."""
  9 |         self.domain = domain
 10 | 
 11 |         # 4 endpoints for each domain.
 12 |         self.http = None
 13 |         self.httpwww = None
 14 |         self.https = None
 15 |         self.httpswww = None
 16 |         self.unknown_error = False
 17 | 
 18 |         # Filled in after analyzing each endpoint.
 19 |         self.canonical = None
 20 | 
 21 |     def to_object(self):
 22 |         """Convert the model to a dictionary."""
 23 |         return {
 24 |             "https": self.https.to_object(),
 25 |             "httpswww": self.httpswww.to_object(),
 26 |             "http": self.http.to_object(),
 27 |             "httpwww": self.httpwww.to_object(),
 28 |         }
 29 | 
 30 | 
 31 | class Endpoint:
 32 |     """Define the endpoint model."""
 33 | 
 34 |     def __init__(self, protocol, host, base_domain):
 35 |         """Initialize the model."""
 36 |         # Basic endpoint description
 37 |         self.protocol = protocol
 38 |         self.host = host  # "www" or "root"
 39 |         self.base_domain = base_domain
 40 |         self.url = self.url_for()
 41 | 
 42 |         # all HTTP/HTTPS endpoints have these
 43 |         self.headers = (
 44 |             {}
 45 |         )  # will be replaced with a requests.structures.CaseInsensitiveDict
 46 |         self.status = None
 47 |         self.live = None
 48 |         self.ip = None
 49 |         self.redirect = None
 50 |         self.server_header = None
 51 |         self.server_version = None
 52 |         self.unknown_error = False
 53 |         self.notes = ""
 54 | 
 55 |         # If an endpoint redirects, characterize the redirect behavior
 56 |         self.redirect_immediately_to = None
 57 |         self.redirect_immediately_to_www = None
 58 |         self.redirect_immediately_to_https = None
 59 |         self.redirect_immediately_to_http = None
 60 |         self.redirect_immediately_to_external = None
 61 |         self.redirect_immediately_to_subdomain = None
 62 |         self.redirect_eventually_to = None
 63 |         self.redirect_eventually_to_https = None
 64 |         self.redirect_eventually_to_http = None
 65 |         self.redirect_eventually_to_external = None
 66 |         self.redirect_eventually_to_subdomain = None
 67 | 
 68 |         # Only HTTPS endpoints have these.
 69 |         # Initialize all of them to None, so that it's
 70 |         # discernible if they don't get explicitly set.
 71 |         self.https_full_connection = None
 72 |         self.https_client_auth_required = False
 73 |         self.https_valid = None
 74 |         self.https_public_trusted = None
 75 |         self.https_custom_trusted = None
 76 |         self.https_bad_chain = None
 77 |         self.https_bad_hostname = None
 78 |         self.https_expired_cert = None
 79 |         self.https_self_signed_cert = None
 80 |         self.https_cert_chain_len = None
 81 |         self.https_missing_intermediate_cert = None
 82 |         self.hsts = None
 83 |         self.hsts_header = None
 84 |         self.hsts_max_age = None
 85 |         self.hsts_all_subdomains = None
 86 |         self.hsts_preload = None
 87 |         self.hsts_preloaded = None
 88 | 
 89 |     def url_for(self):
 90 |         """Return an appropriately formatted URL for the base domain."""
 91 |         if self.host == "root":
 92 |             prefix = ""
 93 |         elif self.host == "www":
 94 |             prefix = "www."
 95 | 
 96 |         return f"{self.protocol}://{prefix}{self.base_domain}"
 97 | 
 98 |     # The fields we want to serialize to JSON.
 99 |     def to_object(self):
100 |         """Convert the model to a dictionary."""
101 |         obj = {
102 |             "url": self.url,
103 |             "headers": dict(self.headers),
104 |             "status": self.status,
105 |             "ip": self.ip,
106 |             "live": self.live,
107 |             "redirect": self.redirect,
108 |             "redirect_eventually_to": self.redirect_eventually_to,
109 |             "redirect_immediately_to": self.redirect_immediately_to,
110 |             "redirect_immediately_to_www": self.redirect_immediately_to_www,
111 |             "redirect_immediately_to_https": self.redirect_immediately_to_https,
112 |             "redirect_immediately_to_http": self.redirect_immediately_to_http,
113 |             "redirect_immediately_to_external": self.redirect_immediately_to_external,
114 |             "redirect_immediately_to_subdomain": self.redirect_immediately_to_subdomain,
115 |             "redirect_eventually_to_https": self.redirect_eventually_to_https,
116 |             "redirect_eventually_to_http": self.redirect_eventually_to_http,
117 |             "redirect_eventually_to_external": self.redirect_eventually_to_external,
118 |             "redirect_eventually_to_subdomain": self.redirect_eventually_to_subdomain,
119 |             "server_header": self.server_header,
120 |             "server_version": self.server_version,
121 |             "notes": self.notes,
122 |             "unknown_error": self.unknown_error,
123 |         }
124 | 
125 |         if self.protocol == "https":
126 |             obj["https_full_connection"] = self.https_full_connection
127 |             obj["https_client_auth_required"] = self.https_client_auth_required
128 |             obj["https_valid"] = self.https_valid
129 |             obj["https_public_trusted"] = self.https_public_trusted
130 |             obj["https_custom_trusted"] = self.https_custom_trusted
131 |             obj["https_bad_chain"] = self.https_bad_chain
132 |             obj["https_bad_hostname"] = self.https_bad_hostname
133 |             obj["https_expired_cert"] = self.https_expired_cert
134 |             obj["https_self_signed_cert"] = self.https_self_signed_cert
135 |             obj["https_cert_chain_len"] = self.https_cert_chain_len
136 |             obj["https_missing_intermediate_cert"] = (
137 |                 self.https_missing_intermediate_cert
138 |             )
139 |             obj["hsts"] = self.hsts
140 |             obj["hsts_header"] = self.hsts_header
141 |             obj["hsts_max_age"] = self.hsts_max_age
142 |             obj["hsts_all_subdomains"] = self.hsts_all_subdomains
143 |             obj["hsts_preload"] = self.hsts_preload
144 | 
145 |         return obj
146 | 


--------------------------------------------------------------------------------
/src/pshtt/utils.py:
--------------------------------------------------------------------------------
  1 | """Define utility functions for the pshtt library."""
  2 | 
  3 | # Standard Python Libraries
  4 | import contextlib
  5 | import csv
  6 | import datetime
  7 | import errno
  8 | import json
  9 | import logging
 10 | import os
 11 | import re
 12 | import sys
 13 | import traceback
 14 | 
 15 | 
 16 | # Display exception without re-throwing it.
 17 | def format_last_exception():
 18 |     """Pretty format the last raised exception."""
 19 |     exc_type, exc_value, exc_traceback = sys.exc_info()
 20 |     return "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
 21 | 
 22 | 
 23 | # mkdir -p in python, from:
 24 | # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
 25 | def mkdir_p(path):
 26 |     """Make a directory and any missing directories in the path."""
 27 |     try:
 28 |         os.makedirs(path)
 29 |     except OSError as exc:  # Python >2.5
 30 |         if exc.errno == errno.EEXIST:
 31 |             pass
 32 |         else:
 33 |             raise
 34 | 
 35 | 
 36 | def json_for(data):
 37 |     """Pretty format the given object to JSON."""
 38 |     return json.dumps(data, sort_keys=True, indent=2, default=format_datetime)
 39 | 
 40 | 
 41 | def write(content, destination, binary=False):
 42 |     """Write contents to a destination after making any missing directories."""
 43 |     parent = os.path.dirname(destination)
 44 |     if parent != "":
 45 |         mkdir_p(parent)
 46 | 
 47 |     with (
 48 |         open(destination, "bw") if binary else open(destination, "w", encoding="utf-8")
 49 |     ) as f:
 50 |         f.write(content)
 51 | 
 52 | 
 53 | def format_datetime(obj):
 54 |     """Provide a formatted datetime."""
 55 |     if isinstance(obj, datetime.date):
 56 |         return obj.isoformat()
 57 |     if isinstance(obj, str):
 58 |         return obj
 59 |     return None
 60 | 
 61 | 
 62 | # Load domains from a CSV, skip a header row
 63 | def load_domains(domain_csv):
 64 |     """Load a list of domains from a CSV file."""
 65 |     domains = []
 66 |     with open(domain_csv, encoding="utf-8") as csvfile:
 67 |         for row in csv.reader(csvfile):
 68 |             # Skip empty rows.
 69 |             if not row or not row[0].strip():
 70 |                 continue
 71 | 
 72 |             row[0] = row[0].lower()
 73 |             # Skip any header row.
 74 |             if not domains and row[0].startswith("domain"):
 75 |                 continue
 76 | 
 77 |             domains.append(row[0])
 78 |     return domains
 79 | 
 80 | 
 81 | # Configure logging level, so logging.debug can hinge on --debug.
 82 | def configure_logging(debug_logging=False):
 83 |     """Configure the logging library."""
 84 |     log_level = logging.DEBUG if debug_logging else logging.WARNING
 85 |     logging.basicConfig(format="%(message)s", level=log_level)
 86 | 
 87 | 
 88 | def format_domains(domains):
 89 |     """Format a given list of domains."""
 90 |     formatted_domains = []
 91 | 
 92 |     for domain in domains:
 93 |         # Replace a single instance of http://, https://, and www. if present.
 94 |         formatted_domains.append(re.sub(r"^(https?://)?(www\.)?", "", domain))
 95 | 
 96 |     return formatted_domains
 97 | 
 98 | 
 99 | def debug(*args, divider=False):
100 |     """Output a debugging message."""
101 |     if divider:
102 |         logging.debug("\n-------------------------\n")
103 | 
104 |     if args:
105 |         logging.debug(*args)
106 | 
107 | 
108 | @contextlib.contextmanager
109 | def smart_open(filename=None):
110 |     """Context manager that can handle writing to a file or stdout.
111 | 
112 |     Adapted from: https://stackoverflow.com/a/17603000
113 |     """
114 |     handle = sys.stdout if filename is None else open(filename, "w", encoding="utf-8")
115 | 
116 |     try:
117 |         yield handle
118 |     finally:
119 |         if handle is not sys.stdout:
120 |             handle.close()
121 | 


--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o nounset
 4 | set -o errexit
 5 | set -o pipefail
 6 | 
 7 | version=$(./bump_version.sh show)
 8 | 
 9 | git tag "v$version" && git push --tags
10 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """pytest plugin configuration.
 2 | 
 3 | https://docs.pytest.org/en/latest/writing_plugins.html#conftest-py-plugins
 4 | """
 5 | 
 6 | # Third-Party Libraries
 7 | import pytest
 8 | 
 9 | 
10 | def pytest_addoption(parser):
11 |     """Add new commandline options to pytest."""
12 |     parser.addoption(
13 |         "--runslow", action="store_true", default=False, help="run slow tests"
14 |     )
15 | 
16 | 
17 | def pytest_configure(config):
18 |     """Register new markers."""
19 |     config.addinivalue_line("markers", "slow: mark test as slow")
20 | 
21 | 
22 | def pytest_collection_modifyitems(config, items):
23 |     """Modify collected tests based on custom marks and commandline options."""
24 |     if config.getoption("--runslow"):
25 |         # --runslow given in cli: do not skip slow tests
26 |         return
27 |     skip_slow = pytest.mark.skip(reason="need --runslow option to run")
28 |     for item in items:
29 |         if "slow" in item.keywords:
30 |             item.add_marker(skip_slow)
31 | 


--------------------------------------------------------------------------------
/tests/test_badssl.py:
--------------------------------------------------------------------------------
 1 | """Test bad SSL results from a domain."""
 2 | 
 3 | # Standard Python Libraries
 4 | import unittest
 5 | 
 6 | # cisagov Libraries
 7 | from pshtt.models import Domain, Endpoint
 8 | from pshtt.pshtt import basic_check, hsts_check
 9 | 
10 | 
11 | def inspect(base_domain):
12 |     """Populate a domain model with the provided domain."""
13 |     domain = Domain(base_domain)
14 |     domain.http = Endpoint("http", "root", base_domain)
15 |     domain.httpwww = Endpoint("http", "www", base_domain)
16 |     domain.https = Endpoint("https", "root", base_domain)
17 |     domain.httpswww = Endpoint("https", "www", base_domain)
18 | 
19 |     return domain
20 | 
21 |     # Analyze HTTP endpoint responsiveness and behavior.
22 |     basic_check(domain.http)
23 |     basic_check(domain.httpwww)
24 |     basic_check(domain.https)
25 |     basic_check(domain.httpswww)
26 | 
27 |     # Analyze HSTS header, if present, on each HTTPS endpoint.
28 |     hsts_check(domain.https)
29 |     hsts_check(domain.httpswww)
30 | 
31 |     return domain
32 | 
33 | 
34 | @unittest.skip("Disable live tests against badssl for now")
35 | class TestCertificate(unittest.TestCase):
36 |     """Test different bad certificate results."""
37 | 
38 |     def test_https_expired(self):
39 |         """Test when the certificate has expired."""
40 |         domain = inspect("expired.badssl.com")
41 |         basic_check(domain.https)
42 | 
43 |         self.assertTrue(domain.https.https_expired_cert)
44 | 
45 |     def test_https_bad_hostname(self):
46 |         """Test when the certificate has a bad hostname."""
47 |         domain = inspect("wrong.host.badssl.com")
48 |         basic_check(domain.https)
49 | 
50 |         self.assertTrue(domain.https.https_bad_hostname)
51 | 
52 |     def test_https_bad_chain(self):
53 |         """Test when there is a bad chain of trust for a certificate."""
54 |         domain = inspect("untrusted-root.badssl.com")
55 |         basic_check(domain.https)
56 | 
57 |         self.assertTrue(domain.https.https_bad_chain)
58 | 
59 |     def test_https_self_signed_cert(self):
60 |         """Test when a certificate is self-signed."""
61 |         domain = inspect("self-signed.badssl.com")
62 |         basic_check(domain.https)
63 | 
64 |         self.assertTrue(domain.https.https_self_signed_cert)
65 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | """Test the command line interface functionality of the library."""
  2 | 
  3 | # Standard Python Libraries
  4 | import os
  5 | import sys
  6 | import tempfile
  7 | import unittest
  8 | 
  9 | # cisagov Libraries
 10 | from pshtt import pshtt as _pshtt
 11 | from pshtt.cli import to_csv
 12 | from pshtt.models import Domain, Endpoint
 13 | 
 14 | 
 15 | class FakeSuffixList:
 16 |     """Test against a fake suffix list."""
 17 | 
 18 |     def get_public_suffix(self, hostname, *args, **kwargs):
 19 |         """Return the public suffix of a hostname."""
 20 |         return hostname
 21 | 
 22 | 
 23 | # Artificially setup the the preload and suffix lists
 24 | # This should be irrelevant after #126 is decided upon / merged
 25 | _pshtt.SUFFIX_LIST = FakeSuffixList()
 26 | _pshtt.PRELOAD_LIST = []
 27 | _pshtt.PRELOAD_PENDING = []
 28 | 
 29 | 
 30 | class TestToCSV(unittest.TestCase):
 31 |     """Test the CSV output of the command line interface."""
 32 | 
 33 |     @classmethod
 34 |     def setUpClass(cls):
 35 |         """Perform initial setup."""
 36 |         base_domain = "example.com"
 37 | 
 38 |         domain = Domain(base_domain)
 39 |         domain.http = Endpoint("http", "root", base_domain)
 40 |         domain.httpwww = Endpoint("http", "www", base_domain)
 41 |         domain.https = Endpoint("https", "root", base_domain)
 42 |         domain.httpswww = Endpoint("https", "www", base_domain)
 43 | 
 44 |         cls.results = _pshtt.result_for(domain)
 45 |         cls.temp_filename = os.path.join(tempfile.gettempdir(), "results.csv")
 46 | 
 47 |     @unittest.skipIf(sys.version_info[0] < 3, "Python 3 test only")
 48 |     def test_no_results(self):
 49 |         """Test when there are no results."""
 50 |         to_csv([], self.temp_filename)
 51 | 
 52 |         with open(self.temp_filename) as fh:
 53 |             content = fh.read()
 54 | 
 55 |         expected = ",".join(_pshtt.HEADERS) + "\n"
 56 | 
 57 |         self.assertEqual(content, expected)
 58 | 
 59 |     @unittest.skipIf(sys.version_info[0] < 3, "Python 3 test only")
 60 |     def test_single_result(self):
 61 |         """Test a single domain result."""
 62 |         to_csv([self.results], self.temp_filename)
 63 | 
 64 |         with open(self.temp_filename) as fh:
 65 |             content = fh.read()
 66 | 
 67 |         domain_data = [
 68 |             ("Domain", "example.com"),
 69 |             ("Base Domain", "example.com"),
 70 |             ("Canonical URL", "http://example.com"),
 71 |             ("Live", "False"),
 72 |             ("HTTPS Live", "False"),
 73 |             ("HTTPS Full Connection", "False"),
 74 |             ("HTTPS Client Auth Required", "False"),
 75 |             ("Redirect", "False"),
 76 |             ("Redirect To", ""),
 77 |             ("Valid HTTPS", "False"),
 78 |             ("HTTPS Publicly Trusted", "False"),
 79 |             ("HTTPS Custom Truststore Trusted", "False"),
 80 |             ("Defaults to HTTPS", "False"),
 81 |             ("Downgrades HTTPS", "False"),
 82 |             ("Strictly Forces HTTPS", "False"),
 83 |             ("HTTPS Bad Chain", "False"),
 84 |             ("HTTPS Bad Hostname", "False"),
 85 |             ("HTTPS Expired Cert", "False"),
 86 |             ("HTTPS Self Signed Cert", "False"),
 87 |             ("HSTS", ""),
 88 |             ("HSTS Header", ""),
 89 |             ("HSTS Max Age", ""),
 90 |             ("HSTS Entire Domain", ""),
 91 |             ("HSTS Preload Ready", "False"),
 92 |             ("HSTS Preload Pending", "False"),
 93 |             ("HSTS Preloaded", "False"),
 94 |             ("Base Domain HSTS Preloaded", "False"),
 95 |             ("Domain Supports HTTPS", "False"),
 96 |             ("Domain Enforces HTTPS", "False"),
 97 |             ("Domain Uses Strong HSTS", ""),
 98 |             ("IP", ""),
 99 |             ("Server Header", ""),
100 |             ("Server Version", ""),
101 |             ("HTTPS Cert Chain Length", ""),
102 |             ("HTTPS Probably Missing Intermediate Cert", "False"),
103 |             ("Notes", ""),
104 |             ("Unknown Error", "False"),
105 |         ]
106 | 
107 |         self.maxDiff = None
108 | 
109 |         header = ",".join(t[0] for t in domain_data)
110 |         values = ",".join(t[1] for t in domain_data)
111 |         expected = header + "\n" + values + "\n"
112 |         self.assertEqual(content, expected)
113 | 
114 |         # Sanity check that this hard coded data has the same headers as defined
115 |         # in the package. This should never fail, as the above assert should
116 |         # catch any changes in the header columns.
117 |         self.assertEqual(header, ",".join(_pshtt.HEADERS))
118 | 


--------------------------------------------------------------------------------
/tests/test_definitions.py:
--------------------------------------------------------------------------------
  1 | """Test the library's models."""
  2 | 
  3 | # Standard Python Libraries
  4 | import unittest
  5 | 
  6 | # cisagov Libraries
  7 | from pshtt import pshtt as api
  8 | from pshtt.models import Domain, Endpoint
  9 | 
 10 | 
 11 | class TestUsesHTTPS(unittest.TestCase):
 12 |     """Test for a domain using HTTPS."""
 13 | 
 14 |     def setUp(self):
 15 |         """Perform initial setup."""
 16 |         base_domain = "example.com"
 17 |         self.domain = Domain(base_domain)
 18 | 
 19 |         self.domain.http = Endpoint("http", "root", base_domain)
 20 |         self.domain.httpwww = Endpoint("http", "www", base_domain)
 21 |         self.domain.https = Endpoint("https", "root", base_domain)
 22 |         self.domain.httpswww = Endpoint("https", "www", base_domain)
 23 | 
 24 |     @unittest.skip("Still working on definition")
 25 |     def test_definition(self):
 26 |         """Test the definition of a domain using HTTPS."""
 27 |         self.domain.https.live = True
 28 |         self.domain.https.https_valid = True
 29 |         self.domain.https.https_valid = True
 30 | 
 31 |         self.assertTrue(api.is_domain_supports_https(self.domain))
 32 | 
 33 | 
 34 | class TestBadChain(unittest.TestCase):
 35 |     """Test for a bad certificate chain."""
 36 | 
 37 |     def setUp(self):
 38 |         """Perform initial setup."""
 39 |         base_domain = "example.com"
 40 |         self.domain = Domain(base_domain)
 41 | 
 42 |         self.domain.http = Endpoint("http", "root", base_domain)
 43 |         self.domain.httpwww = Endpoint("http", "www", base_domain)
 44 |         self.domain.https = Endpoint("https", "root", base_domain)
 45 |         self.domain.httpswww = Endpoint("https", "www", base_domain)
 46 | 
 47 |     def test_bad_chain_root(self):
 48 |         """Test the root domain name."""
 49 |         self.domain.https.https_bad_chain = True
 50 |         self.domain.canonical = self.domain.https
 51 | 
 52 |         self.assertTrue(api.is_bad_chain(self.domain))
 53 | 
 54 |     def test_bad_chain_www(self):
 55 |         """Test the www prefixed domain name."""
 56 |         self.domain.httpswww.https_bad_chain = True
 57 |         self.domain.canonical = self.domain.httpswww
 58 | 
 59 |         self.assertTrue(api.is_bad_chain(self.domain))
 60 | 
 61 |     def test_bad_chain_both(self):
 62 |         """Test both the root and www prefixed domain name."""
 63 |         self.domain.https.https_bad_chain = True
 64 |         self.domain.httpswww.https_bad_chain = True
 65 | 
 66 |         self.domain.canonical = self.domain.https
 67 |         self.assertTrue(api.is_bad_chain(self.domain))
 68 | 
 69 |         self.domain.canonical = self.domain.httpswww
 70 |         self.assertTrue(api.is_bad_chain(self.domain))
 71 | 
 72 | 
 73 | class TestBadHostname(unittest.TestCase):
 74 |     """Verify the bad hostname check."""
 75 | 
 76 |     def setUp(self):
 77 |         """Perform initial setup."""
 78 |         base_domain = "example.com"
 79 |         self.domain = Domain(base_domain)
 80 | 
 81 |         self.domain.http = Endpoint("http", "root", base_domain)
 82 |         self.domain.httpwww = Endpoint("http", "www", base_domain)
 83 |         self.domain.https = Endpoint("https", "root", base_domain)
 84 |         self.domain.httpswww = Endpoint("https", "www", base_domain)
 85 | 
 86 |     def test_bad_hostname_root(self):
 87 |         """Test using the base domain name."""
 88 |         self.domain.https.https_bad_hostname = True
 89 |         self.domain.canonical = self.domain.https
 90 | 
 91 |         self.assertTrue(api.is_bad_hostname(self.domain))
 92 | 
 93 |     def test_bad_hostname_www(self):
 94 |         """Test using the www prefixed domain name."""
 95 |         self.domain.httpswww.https_bad_hostname = True
 96 |         self.domain.canonical = self.domain.httpswww
 97 | 
 98 |         self.assertTrue(api.is_bad_hostname(self.domain))
 99 | 
100 |     def test_bad_hostname_both(self):
101 |         """Test both the root and www prefixed domain name."""
102 |         self.domain.https.https_bad_hostname = True
103 |         self.domain.httpswww.https_bad_hostname = True
104 | 
105 |         self.domain.canonical = self.domain.https
106 |         self.assertTrue(api.is_bad_hostname(self.domain))
107 | 
108 |         self.domain.canonical = self.domain.httpswww
109 |         self.assertTrue(api.is_bad_hostname(self.domain))
110 | 


--------------------------------------------------------------------------------
/tests/test_pshtt.py:
--------------------------------------------------------------------------------
 1 | """Test the core functionality of the library."""
 2 | 
 3 | # Standard Python Libraries
 4 | import unittest
 5 | 
 6 | # cisagov Libraries
 7 | from pshtt.models import Domain, Endpoint
 8 | from pshtt.pshtt import is_live
 9 | 
10 | 
11 | class TestLiveliness(unittest.TestCase):
12 |     """Test the liveliness of a domain."""
13 | 
14 |     def setUp(self):
15 |         """Perform initial setup."""
16 |         base_domain = "example.com"
17 |         self.domain = Domain(base_domain)
18 | 
19 |         self.domain.http = Endpoint("http", "root", base_domain)
20 |         self.domain.httpwww = Endpoint("http", "www", base_domain)
21 |         self.domain.https = Endpoint("https", "root", base_domain)
22 |         self.domain.httpswww = Endpoint("https", "www", base_domain)
23 | 
24 |     def test_none(self):
25 |         """Test in an unchecked state."""
26 |         self.assertFalse(is_live(self.domain))
27 | 
28 |     def test_http_only(self):
29 |         """Test when only HTTP access is live on the base domain name."""
30 |         self.domain.http.live = True
31 | 
32 |         self.assertTrue(is_live(self.domain))
33 | 
34 |     def test_https_only(self):
35 |         """Test when only HTTPS access is live on the base domain name."""
36 |         self.domain.https.live = True
37 | 
38 |         self.assertTrue(is_live(self.domain))
39 | 
40 |     def test_httpwww_only(self):
41 |         """Test when only HTTP access is live on the www prefixed domain name."""
42 |         self.domain.httpwww.live = True
43 | 
44 |         self.assertTrue(is_live(self.domain))
45 | 
46 |     def test_httpswww_only(self):
47 |         """Test when only HTTPS access is live on the www prefixed domain name."""
48 |         self.domain.httpswww.live = True
49 | 
50 |         self.assertTrue(is_live(self.domain))
51 | 
52 |     def test_http_both(self):
53 |         """Test when only HTTP access is live on both domain names."""
54 |         self.domain.http.live = True
55 |         self.domain.httpwww.live = True
56 | 
57 |         self.assertTrue(is_live(self.domain))
58 | 
59 |     def test_https_both(self):
60 |         """Test when only HTTPS access is live on both domain names."""
61 |         self.domain.https.live = True
62 |         self.domain.httpswww.live = True
63 | 
64 |         self.assertTrue(is_live(self.domain))
65 | 
66 |     def test_www_neither(self):
67 |         """Test when both HTTP and HTTPS are live on only the base domain."""
68 |         self.domain.http.live = True
69 |         self.domain.https.live = True
70 | 
71 |         self.assertTrue(is_live(self.domain))
72 | 
73 |     def test_www_both(self):
74 |         """Test when both HTTP and HTTPS are live on the www prefixed domain name."""
75 |         self.domain.httpwww.live = True
76 |         self.domain.httpswww.live = True
77 | 
78 |         self.assertTrue(is_live(self.domain))
79 | 
80 |     def test_all(self):
81 |         """Test when both HTTP and HTTPS are live on both domain names."""
82 |         self.domain.http.live = True
83 |         self.domain.https.live = True
84 |         self.domain.httpwww.live = True
85 |         self.domain.httpswww.live = True
86 | 
87 |         self.assertTrue(is_live(self.domain))
88 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | """Test the utility functions for the pshtt library."""
 2 | 
 3 | # Standard Python Libraries
 4 | import os
 5 | import sys
 6 | import tempfile
 7 | import unittest
 8 | 
 9 | # cisagov Libraries
10 | from pshtt.utils import smart_open
11 | 
12 | 
13 | class TestSmartOpen(unittest.TestCase):
14 |     """Test the functionality of the smart_open function."""
15 | 
16 |     def test_without_filename(self):
17 |         """Test that standard out is used if no filename is provided."""
18 |         with smart_open() as fh:
19 |             self.assertIs(fh, sys.stdout)
20 | 
21 |     def test_with_empty_filename(self):
22 |         """Test when an empty string is provided as a filename.
23 | 
24 |         Should raise a `FileNotFoundError`
25 |         """
26 |         with self.assertRaises(FileNotFoundError):  # noqa
27 |             with smart_open(""):
28 |                 pass
29 | 
30 |     def test_with_real_filename(self):
31 |         """Test when a valid string is provided as a filename."""
32 |         test_data = "This is the test data"
33 | 
34 |         with tempfile.TemporaryDirectory() as tmp_dirname:
35 |             # Make a temporary file to use
36 |             filename = os.path.join(tmp_dirname, "foo")
37 | 
38 |             with smart_open(filename) as fh:
39 |                 fh.write(test_data)
40 | 
41 |             self.assertEqual(test_data, open(filename).read())
42 | 


--------------------------------------------------------------------------------