├── .github
├── CODEOWNERS
└── workflows
│ ├── check.yml
│ ├── publish.yml
│ ├── publish_to_testing.yml
│ └── update.yml
├── .gitignore
├── .vscode
├── c_cpp_properties.json
└── settings.json
├── Dockerfile
├── LICENSE
├── README.md
├── docs
└── images
│ ├── Human-Output-Example.png
│ ├── SARIF-Viewer-Example.png
│ ├── STACS-Logo-RGB.png
│ └── STACS-Logo-RGB.small.png
├── pyproject.toml
├── setup.py
├── stacs
├── __init__.py
├── native
│ └── archive
│ │ └── src
│ │ ├── archive.cpp
│ │ ├── archiveentry.cpp
│ │ ├── archiveentry.hpp
│ │ ├── archivereader.cpp
│ │ └── archivereader.hpp
└── scan
│ ├── __about__.py
│ ├── __init__.py
│ ├── constants.py
│ ├── entrypoint
│ ├── __init__.py
│ └── cli.py
│ ├── exceptions.py
│ ├── filter
│ ├── __init__.py
│ └── ignore_list.py
│ ├── helper.py
│ ├── loader
│ ├── __init__.py
│ ├── archive.py
│ ├── filepath.py
│ ├── format
│ │ ├── __init__.py
│ │ ├── dmg.py
│ │ └── xar.py
│ └── manifest.py
│ ├── model
│ ├── __init__.py
│ ├── finding.py
│ ├── ignore_list.py
│ ├── manifest.py
│ └── pack.py
│ ├── output
│ ├── __init__.py
│ ├── markdown.py
│ ├── pretty.py
│ └── sarif.py
│ └── scanner
│ ├── __init__.py
│ └── rules.py
├── tests
├── __init__.py
├── fixtures
│ ├── .gitignore
│ ├── findings
│ │ ├── 001.txt
│ │ ├── 002.txt
│ │ ├── 003.txt
│ │ └── 004.txt
│ ├── ignore_list
│ │ ├── 001-simple.valid.json
│ │ ├── 002-framework.valid.json
│ │ ├── 002-project.valid.json
│ │ └── 002-system.valid.json
│ └── pack
│ │ ├── 001-simple.valid.json
│ │ ├── 002-cloud.valid.json
│ │ ├── 002-parent.valid.json
│ │ ├── 002-pki-dsa.valid.json
│ │ ├── 002-pki-rsa.valid.json
│ │ └── 002-pki.valid.json
├── test_filter_ignore_list.py
├── test_loader_filepath.py
├── test_model_ignore_list.py
├── test_model_pack.py
├── test_output_sarif.py
└── test_scanner_rule.py
└── wrapper
└── stacs-scan
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Own everything by default. This can be changed later and as needed.
2 | * @darkarnium
3 |
--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
1 | name: Check
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | check:
10 | strategy:
11 | matrix:
12 | python: ['3.9', '3.10', '3.11']
13 |
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | - name: Install Dependencies
19 | run: |
20 | sudo apt update
21 | sudo apt install -y libarchive13 libarchive-dev
22 |
23 | - name: Configure Python (${{ matrix.python }})
24 | uses: actions/setup-python@v2
25 | with:
26 | python-version: ${{ matrix.python }}
27 |
28 | - name: Install Tox
29 | run: |
30 | python -m pip install --upgrade pip wheel setuptools
31 | pip install tox
32 |
33 | - name: Run Linters (${{ matrix.python }})
34 | run: |
35 | tox -e linters
36 |
37 | - name: Run Tests (${{ matrix.python }})
38 | run: |
39 | tox -e py3
40 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | build_wheels:
9 | name: Build wheels on ${{ matrix.os }}
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12]
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | with:
18 | ref: ${{ github.event.inputs.release }}
19 |
20 | - name: Configure Python
21 | uses: actions/setup-python@v3
22 |
23 | # TODO: This may result in macOS compiling against a newer version of libarchive
24 | # than Linux.
25 | - name: Install dependencies (macOS)
26 | if: startsWith(matrix.os, 'macos-')
27 | run: brew install libarchive
28 |
29 | - name: Install cibuildwheel
30 | run: python -m pip install cibuildwheel==2.12.3
31 |
32 | - name: Build wheels (macOS)
33 | run: python -m cibuildwheel --output-dir wheelhouse
34 | if: startsWith(matrix.os, 'macos-')
35 |
36 | - name: Build wheels (Ubuntu)
37 | run: python -m cibuildwheel --output-dir wheelhouse
38 | if: startsWith(matrix.os, 'ubuntu-')
39 | env:
40 | CIBW_BEFORE_ALL_LINUX: >
41 | curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz &&
42 | tar -zxvf libarchive-3.6.1.tar.gz &&
43 | cd libarchive-3.6.1/ &&
44 | ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat &&
45 | make &&
46 | make install
47 |
48 | - uses: actions/upload-artifact@v3
49 | with:
50 | path: ./wheelhouse/*.whl
51 |
52 | build_sdist:
53 | runs-on: ubuntu-latest
54 | steps:
55 | - uses: actions/checkout@v2
56 |
57 | - name: Configure Python
58 | uses: actions/setup-python@v2
59 | with:
60 | python-version: 3.11
61 |
62 | - name: Build Python sdist
63 | run: |
64 | python -m pip install --upgrade pip wheel setuptools
65 | pip install build
66 | python -m build --sdist --outdir dist/ .
67 |
68 | - uses: actions/upload-artifact@v3
69 | with:
70 | path: dist/*.tar.gz
71 |
72 | publish:
73 | needs: [build_wheels, build_sdist]
74 | runs-on: ubuntu-latest
75 | steps:
76 | - uses: actions/checkout@v2
77 | with:
78 | ref: ${{ github.event.inputs.release }}
79 |
80 | - uses: actions/download-artifact@v3
81 | with:
82 | name: artifact
83 | path: dist
84 |
85 | - name: Extract version
86 | run: |
87 | git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules
88 | pushd /tmp/stacs-rules
89 | RULES_VERSION="$(git rev-parse --short HEAD)"
90 | popd
91 | STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')"
92 | echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}"
93 | echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}"
94 |
95 | - name: Publish Python package
96 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
97 | with:
98 | user: __token__
99 | password: ${{ secrets.PYPI_TOKEN }}
100 |
101 | - name: Wait a minute for PyPi to catch up
102 | run: sleep 60s
103 | shell: bash
104 |
105 | - name: Login to DockerHub
106 | uses: docker/login-action@v1
107 | with:
108 | username: ${{ secrets.DOCKERHUB_USERNAME }}
109 | password: ${{ secrets.DOCKERHUB_TOKEN }}
110 |
111 | - name: Build and push Docker image
112 | id: docker_build
113 | uses: docker/build-push-action@v2
114 | with:
115 | context: .
116 | push: true
117 | tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }}
118 | build-args: |
119 | VERSION=${{ env.IMAGE_VERSION }}
120 | STACS_BUILD=${{ env.STACS_VERSION }}
121 |
--------------------------------------------------------------------------------
/.github/workflows/publish_to_testing.yml:
--------------------------------------------------------------------------------
1 | name: Publish to Testing
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | commit:
7 | description: The commit ref to build and release to PyPI testing.
8 | required: true
9 |
10 | jobs:
11 | build_wheels:
12 | name: Build wheels on ${{ matrix.os }}
13 | runs-on: ${{ matrix.os }}
14 | strategy:
15 | matrix:
16 | os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12]
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | with:
21 | ref: ${{ github.event.inputs.release }}
22 |
23 | - name: Configure Python
24 | uses: actions/setup-python@v3
25 |
26 | # TODO: This may result in macOS compiling against a newer version of libarchive
27 | # than Linux.
28 | - name: Install dependencies (macOS)
29 | if: startsWith(matrix.os, 'macos-')
30 | run: brew install libarchive
31 |
32 | # This is rather unpleasant and the package versioning should be adjusted to
33 | # allow snapshot build numbers to be injected via setuptools, etc.
34 | - name: Set development version suffix
35 | run: |
36 | sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \
37 | stacs/scan/__about__.py
38 |
39 | - name: Install cibuildwheel
40 | run: python -m pip install cibuildwheel==2.12.3
41 |
42 | - name: Build wheels (macOS)
43 | run: python -m cibuildwheel --output-dir wheelhouse
44 | if: startsWith(matrix.os, 'macos-')
45 | env:
46 | CIBW_BEFORE_BUILD: pip install pybind11
47 | CIBW_ENVIRONMENT: >
48 | CPPFLAGS="-std=c++11 -I$(find `brew --cellar libarchive` -name include -type d)" \
49 | LDFLAGS="-L$(find `brew --cellar libarchive` -name include -type d)" \
50 | PKG_CONFIG="$(find `brew --cellar libarchive` -name pkgconfig -type d)"
51 |
52 | - name: Build wheels (Ubuntu)
53 | run: python -m cibuildwheel --output-dir wheelhouse
54 | if: startsWith(matrix.os, 'ubuntu-')
55 | env:
56 | CIBW_BEFORE_BUILD: pip install pybind11
57 | CIBW_BEFORE_ALL_LINUX: >
58 | curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz &&
59 | tar -zxvf libarchive-3.6.1.tar.gz &&
60 | cd libarchive-3.6.1/ &&
61 | ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat &&
62 | make &&
63 | make install
64 |
65 | - uses: actions/upload-artifact@v3
66 | with:
67 | path: ./wheelhouse/*.whl
68 |
69 | build_sdist:
70 | runs-on: ubuntu-latest
71 | steps:
72 | - uses: actions/checkout@v2
73 |
74 | - name: Configure Python
75 | uses: actions/setup-python@v2
76 | with:
77 | python-version: 3.11
78 |
79 | # This is rather unpleasant and the package versioning should be adjusted to
80 | # allow snapshot build numbers to be injected via setuptools, etc.
81 | - name: Set development version suffix
82 | run: |
83 | sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \
84 | stacs/scan/__about__.py
85 |
86 | - name: Build Python sdist
87 | run: |
88 | python -m pip install --upgrade pip wheel setuptools
89 | pip install build
90 | python -m build --sdist --outdir dist/ .
91 |
92 | - uses: actions/upload-artifact@v3
93 | with:
94 | path: dist/*.tar.gz
95 |
96 | publish:
97 | needs: [build_wheels, build_sdist]
98 | runs-on: ubuntu-latest
99 | steps:
100 | - uses: actions/download-artifact@v3
101 | with:
102 | name: artifact
103 | path: dist
104 |
105 | - name: Publish Python package
106 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
107 | with:
108 | user: __token__
109 | password: ${{ secrets.PYPI_TESTING_TOKEN }}
110 | repository_url: "https://test.pypi.org/legacy/"
111 |
--------------------------------------------------------------------------------
/.github/workflows/update.yml:
--------------------------------------------------------------------------------
1 | name: Update
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | release:
7 | description: The tagged release version to rebuild with the latest rules.
8 | default: 0.0.0
9 | required: true
10 |
11 | jobs:
12 | update:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - name: Configure Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: 3.11
21 |
22 | - name: Extract version
23 | run: |
24 | git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules
25 | pushd /tmp/stacs-rules
26 | RULES_VERSION="$(git rev-parse --short HEAD)"
27 | popd
28 | STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')"
29 | echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}"
30 | echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}"
31 |
32 | - name: Login to DockerHub
33 | uses: docker/login-action@v1
34 | with:
35 | username: ${{ secrets.DOCKERHUB_USERNAME }}
36 | password: ${{ secrets.DOCKERHUB_TOKEN }}
37 |
38 | - name: Build and push Docker image
39 | id: docker_build
40 | uses: docker/build-push-action@v2
41 | with:
42 | context: .
43 | push: true
44 | tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }}
45 | build-args: |
46 | VERSION=${{ env.IMAGE_VERSION }}
47 | STACS_BUILD=${{ env.STACS_VERSION }}
48 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
134 | # pytype static type analyzer
135 | .pytype/
136 |
137 | # Cython debug symbols
138 | cython_debug/
139 |
140 | # Tracker
141 | TODO.md
142 |
143 | # Profiling information.
144 | *.prof
145 | result.json
146 | stacs-rules/
147 |
148 | # macOS files.
149 | .DS_Store
150 |
151 | # Wheel build.
152 | wheelhouse/
153 |
154 | # Ignore compiled shared objects.
155 | *.so
156 |
--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
1 | {
2 | "configurations": [
3 | {
4 | "name": "Linux",
5 | "includePath": [
6 | "${workspaceFolder}/**",
7 | "/usr/include/python3.9"
8 | ],
9 | "defines": [],
10 | "compilerPath": "/usr/bin/gcc",
11 | "cStandard": "gnu17",
12 | "cppStandard": "gnu++14",
13 | "intelliSenseMode": "linux-gcc-x64"
14 | }
15 | ],
16 | "version": 4
17 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.linting.pylintEnabled": false,
3 | "python.linting.flake8Enabled": true,
4 | "python.linting.enabled": true,
5 | "python.formatting.provider": "black",
6 | "editor.formatOnSave": true,
7 | "editor.codeActionsOnSave": {
8 | "source.organizeImports": true
9 | },
10 | "editor.rulers": [
11 | 79,
12 | 88
13 | ],
14 | "C_Cpp.clang_format_fallbackStyle": "{ BasedOnStyle: Google, IndentWidth: 4, ColumnLimit: 0}"
15 | }
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-alpine
2 |
3 | # Allow build-time specification of version.
4 | ARG VERSION
5 | ARG STACS_BUILD
6 |
7 | # Allow runtime tuning.
8 | ENV STACS_SKIP_UNPROCESSABLE=0
9 | ENV STACS_THREADS=10
10 | ENV STACS_DEBUG=0
11 | ENV STACS_OUTPUT_PRETTY=0
12 |
13 | # Keep things friendly.
14 | LABEL org.opencontainers.image.title="STACS"
15 | LABEL org.opencontainers.image.description="Static Token And Credential Scanner"
16 | LABEL org.opencontainers.image.url="https://www.github.com/stacscan/stacs"
17 | LABEL org.opencontainers.image.version=$VERSION
18 |
19 | # Install STACS into the container.
20 | WORKDIR /opt/stacs
21 | COPY wrapper/stacs-scan /usr/bin
22 |
23 | RUN apk add --no-cache git gcc musl-dev zstd && \
24 | pip install --no-cache-dir stacs==$STACS_BUILD
25 |
26 | # Clone the latest STACS rules into the rules directory to enable out of the box use.
27 | # This can be mounted over using a volume mount to allow more specific rules to be
28 | # loaded. The same is true for "ignore-lists". Finally, there is a "cache" directory
29 | # configured as a mount to allow scans which need a lot of disk space to mount a scratch
30 | # volume so that Docker doesn't run out of disk :)
31 | RUN mkdir -p /mnt/stacs/input /mnt/stacs/rules /mnt/stacs/ignore /mnt/stacs/cache && \
32 | git clone https://www.github.com/stacscan/stacs-rules /mnt/stacs/rules
33 |
34 | # Define a volume to allow mounting a local directory to scan.
35 | VOLUME /mnt/stacs/input
36 | VOLUME /mnt/stacs/rules
37 | VOLUME /mnt/stacs/ignore
38 | VOLUME /mnt/stacs/cache
39 |
40 | # Clean up.
41 | RUN apk del --purge git musl-dev gcc
42 |
43 | # Default to running stacs with the volume mounts.
44 | ENTRYPOINT ["stacs-scan"]
45 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2021, Peter Adkins
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/stacscan/stacs/actions?workflow=Check)
2 | [](https://github.com/stacscan/stacs/actions?workflow=Publish)
3 | [](https://hub.docker.com/r/stacscan/stacs)
4 | [](https://hub.docker.com/r/stacscan/stacs/tags?page=1&ordering=last_updated)
5 | [](https://twitter.com/stacscan)
6 |
7 |
8 |
9 |
10 |
11 |
12 | Static Token And Credential Scanner
13 |
14 |
15 |
16 | ### What is it?
17 |
18 | STACS is a [YARA](https://virustotal.github.io/yara/) powered static credential scanner
19 | which suports binary file formats, analysis of nested archives, composable rulesets
20 | and ignore lists, and SARIF reporting.
21 |
22 | ### What does STACS support?
23 |
24 | Currently, STACS supports recursive unpacking of:
25 |
26 | * 7z, ar, bz2, cab, cpio, gz, iso, rar, rpm, tar, xar, xz, zip, dmg
27 |
28 | As STACS works on detected file types, proprietary file formats based and other
29 | file-types which use these formats are automatically supported. This includes Docker
30 | images, Android APKs, Java JAR files, RPMs, Debian packages (`.deb`), macOS packages
31 | (`.pkg`), and more!
32 |
33 | ### Who should use STACS?
34 |
35 | STACS is designed for use by any teams who release binary artifacts. STACS provides
36 | developers the ability to automatically check for accidental inclusion of static
37 | credentials and key material in their releases.
38 |
39 | However, this doesn't mean STACS can't help with SaaS applications, enterprise
40 | software, or even source code!
41 |
42 | As an example, STACS can be used to find static credentials in Docker images uploaded
43 | to public and private container registries. It can also be used to find credentials
44 | accidentally compiled in to executables, packages for mobile devices, and "enterprise
45 | archives" - such as those used by Java application servers.
46 |
47 | ### How does it work?
48 |
49 | STACS detects static credentials using "rule packs" provided to STACS when run. These
50 | rule packs define a set of YARA rules to run against files provided to STACS. When a
51 | match against a rule is found, a "finding" is generated. These findings represent
52 | potential credentials inside of a file, and are reported on for a developer to remediate
53 | or "ignore".
54 |
55 | If the finding is found to be a false positive - that is, a match on something other
56 | than a real credential - the developer can generate a set of "ignore lists" to ensure
57 | that these matches don't appear in future reports.
58 |
59 | The real power from STACS comes from the automatic detection and unpacking of nested
60 | archives, and composable ignore lists and rule packs.
61 |
62 | #### Ignore lists?
63 |
64 | In order to allow flexible and collaborative usage, STACS supports composable ignore
65 | lists. This allows for an ignore list to include other ignore lists which enable
66 | composition of a "tree of ignores" based on organisational guidelines. These ignore
67 | lists are especially useful in organisations where many of the same frameworks or
68 | products are used. If a team has already marked a finding as a false positive, other
69 | teams get the benefit of not having to triage the same finding.
70 |
71 | #### Rule packs?
72 |
73 | In the same manner as ignore lists, rule packs are also composable. This enables an
74 | organisation to define a baseline set of rules for use by all teams, while still
75 | allowing teams to maintain rulesets specific to their products.
76 |
77 | ### How do I use it?
78 |
79 | The easiest way to use STACS is using the Docker images published to Docker Hub.
80 | However, STACS can also be installed directly from Python's PyPI, or by cloning this
81 | repository. See the relevant sections below to get started!
82 |
83 | A cloud based service is coming soon which allows integration directly in build
84 | and release pipelines to enable detection of static credentials before release!
85 |
86 | #### Docker
87 |
88 | Using the published images, STACS can be used to scan artifacts right away! The STACS
89 | Docker images provides a number of volume mounts for files wanted to be scanned to be
90 | mounted directly into the scan container.
91 |
92 | As an example, to scan everything in the current folder, the following command can be
93 | run (Docker must be installed).
94 |
95 | ```
96 | docker run \
97 | --rm \
98 | -v "$(pwd):/mnt/stacs/input:ro" \
99 | stacscan/stacs:latest
100 | ```
101 |
102 | If you would like to receive "pretty" readable output, the following command should be
103 | used:
104 |
105 | ```
106 | docker run \
107 | --rm \
108 | -e STACS_OUTPUT_PRETTY=1 \
109 | -v "$(pwd):/mnt/stacs/input:ro" \
110 | stacscan/stacs:latest
111 | ```
112 |
113 | By default, STACS will output any findings in SARIF format directly to STDOUT and in
114 | order to keep things orderly, all log messages will be sent to STDERR. For more advanced
115 | use cases, a number of other volume mounts are provided. These allow the user to control
116 | the rule packs, ignore lists, and a cache directories to use.
117 |
118 | #### PyPi
119 |
120 | STACS can also be installed directly from Python's PyPi. This provides a `stacs` command
121 | which can then be used by developers to scan projects directly in their local
122 | development environments.
123 |
124 | STACS can be installed directly from PyPi using:
125 |
126 | ```
127 | pip install stacs
128 | ```
129 |
130 | **Please Note:** The PyPi release of STACS does not come with any rules. These will also
131 | need to be cloned from the [community rules repository](https://github.com/stacscan/stacs-rules)
132 | for STACS to work!
133 |
134 | ### FAQ
135 |
136 | #### Is there a hosted version of STACS?
137 |
138 | Not yet. However, there are plans for a hosted version of STACS which can be easily
139 | integrated into existing build systems, and which contains additional prebuilt rule
140 | packs and ignore lists.
141 |
142 | #### What do I do about false positives?
143 |
144 | Unfortunately, false positives are an inevitable side effect during the detection of
145 | static credentials. If rules are too granular then rule maintenance becomes a burden
146 | and STACS may miss credentials. If rules are too coarse then STACS may generate too
147 | many false positives!
148 |
149 | In order to assist, STACS provides a number of tools to assist with reducing the number
150 | of false positives which make it into final reports.
151 |
152 | Primarily, STACS provides a mechanism which allows users to define composable ignore
153 | lists which allow a set of findings to be "ignored". These rules can be as coarse as
154 | ignoring all files based on a pattern, or as granular as a specific finding on a
155 | particular line of a file.
156 |
157 | This information is automatically propagated through into reports, so "ignored" findings
158 | will be marked as "suppressed" in SARIF output while also including the reason for the
159 | ignore in the output for tracking.
160 |
161 | #### How do I view the results?
162 |
163 | If using "pretty" output (`--pretty` / `STACS_OUTPUT_PRETTY`), results will be printed
164 | in a human readable format to the console.
165 |
166 | 
167 |
168 | If using SARIF, there are a number of viewers available which make this data easier to
169 | read, such as [this great web based viewer from](https://microsoft.github.io/sarif-web-component/)
170 | Microsoft. An example of the findings from a Docker container image has been included
171 | below:
172 |
173 | 
174 |
175 | #### The performance is really, really bad when running in Docker on macOS!
176 |
177 | Unfortunately, this appears to be due to a limitation of Docker Desktop for Mac. I/O
178 | for bind mounts [is really, really slow](https://github.com/docker/for-mac/issues/3677).
179 |
--------------------------------------------------------------------------------
/docs/images/Human-Output-Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/Human-Output-Example.png
--------------------------------------------------------------------------------
/docs/images/SARIF-Viewer-Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/SARIF-Viewer-Example.png
--------------------------------------------------------------------------------
/docs/images/STACS-Logo-RGB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.png
--------------------------------------------------------------------------------
/docs/images/STACS-Logo-RGB.small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.small.png
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "setuptools-scm", "pybind11"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "stacs"
7 | readme = "README.md"
8 | description = "Static Token And Credential Scanner."
9 | requires-python = ">=3.8"
10 | dynamic = ["version"]
11 | authors = [{name = "Peter Adkins"}]
12 | license = {text = "BSD-3-Clause"}
13 | classifiers = [
14 | "Programming Language :: Python :: 3.8",
15 | "Natural Language :: English",
16 | ]
17 | dependencies = [
18 | "click>=8.1.0,<9.0",
19 | "yara-python==4.2.3",
20 | "pydantic>=1.10.0,<2.0",
21 | "colorama>=0.4.0,<1.0",
22 | "zstandard>=0.18.0,<1.0",
23 | ]
24 |
25 | [project.optional-dependencies]
26 | tests = [
27 | "black",
28 | "coverage",
29 | "ruff",
30 | "types-setuptools",
31 | "mypy",
32 | "pip-tools",
33 | "mock",
34 | "pytest",
35 | "pytest-cov",
36 | "responses",
37 | "tox",
38 | "ipython",
39 | ]
40 |
41 | [tool.setuptools.dynamic]
42 | version = {attr = "stacs.scan.__about__.__version__"}
43 |
44 | [tool.setuptools.packages.find]
45 | where = ["."]
46 | include = ["stacs.*"]
47 |
48 | [project.scripts]
49 | stacs = "stacs.scan.entrypoint.cli:main"
50 |
51 | [tool.ruff]
52 | line-length = 88
53 | extend-select = [
54 | "B", # flake8-bugbear
55 | "I", # isort
56 | ]
57 | ignore = [
58 | "B904",
59 | "I001",
60 | "B005",
61 | ]
62 |
63 | [tool.mypy]
64 | files = [
65 | "./stacs/**/*.py",
66 | "./tests/**/*.py"
67 | ]
68 | allow_redefinition = false
69 | check_untyped_defs = true
70 | disallow_any_generics = true
71 | disallow_untyped_calls = false
72 | ignore_errors = false
73 | ignore_missing_imports = true
74 | implicit_reexport = false
75 | local_partial_types = true
76 | strict_optional = true
77 | strict_equality = true
78 | no_implicit_optional = true
79 | warn_no_return = true
80 | warn_unused_ignores = true
81 | warn_redundant_casts = true
82 | warn_unused_configs = true
83 | warn_unreachable = true
84 |
85 | [tool.pytest.ini_options]
86 | junit_family = "xunit2"
87 | norecursedirs = ".*"
88 | self-contained-html = true
89 | testpaths = [
90 | "tests"
91 | ]
92 | addopts = """
93 | --strict
94 | --tb=auto
95 | --cov=stacs
96 | --cov-report=term-missing:skip-covered
97 | --cov-branch
98 | -p no:doctest
99 | -p no:warnings
100 | -s
101 | """
102 |
103 | [tool.tox]
104 | legacy_tox_ini = """
105 | [tox]
106 | envlist = linters,py3
107 |
108 | [testenv]
109 | pip_version = pip
110 | extras = tests
111 | commands = pytest -c pyproject.toml
112 | srcs = stacs
113 |
114 | [testenv:linters]
115 | basepython = python3
116 | usedevelop = true
117 | commands =
118 | {[testenv:ruff]commands}
119 |
120 | [testenv:ruff]
121 | basepython = python3
122 | skip_install = true
123 | commands =
124 | ruff check {[testenv]srcs}
125 |
126 | [testenv:mypy]
127 | basepython3 = python3
128 | skip_install = true
129 | commands =
130 | - mypy --config-file pyproject.toml {[testenv]srcs}
131 | """
132 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Setup required for pybind11 built native code only."""
2 |
3 | import os
4 | import platform
5 | import subprocess
6 | from typing import List
7 |
8 | from pybind11.setup_helpers import Pybind11Extension
9 | from setuptools import setup
10 |
11 | ext_modules = [
12 | Pybind11Extension(
13 | "stacs.native.archive",
14 | ["stacs/native/archive/src/archive.cpp"],
15 | libraries=["archive"],
16 | ),
17 | ]
18 |
19 |
20 | def run(command: List[str]):
21 | """Run a command, returning the output as a string or an exception on failure."""
22 | result = subprocess.run(command, capture_output=True, check=True)
23 | return str(result.stdout, "utf-8").strip()
24 |
25 |
26 | # macOS requires a bit of special handling to ensure that the - likely - brew installed
27 | # libarchive is discoverable. The macOS built-in libarchive is no good, as it's too
28 | # old.
29 | if platform.system() == "Darwin":
30 | libarchive = run(["brew", "--cellar", "libarchive"])
31 | libarchive_headers = run(["find", libarchive, "-name", "include", "-type", "d"])
32 | libarchive_pkgconfig = run(["find", libarchive, "-name", "pkgconfig", "-type", "d"])
33 |
34 | # Setup the environment for the build.
35 | os.environ["LDFLAGS"] = f"-L{libarchive_headers}"
36 | os.environ["PKG_CONFIG"] = libarchive_pkgconfig
37 | os.environ["CPPFLAGS"] = " ".join(
38 | [
39 | os.environ.get("CPPFLAGS", ""),
40 | "-std=c++11",
41 | f"-I{libarchive_headers}",
42 | ]
43 | )
44 |
45 | setup(ext_modules=ext_modules, packages=[])
46 |
--------------------------------------------------------------------------------
/stacs/__init__.py:
--------------------------------------------------------------------------------
1 | """STACS - Static Token And Credential Scanner.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | __import__("pkg_resources").declare_namespace(__name__)
7 |
--------------------------------------------------------------------------------
/stacs/native/archive/src/archive.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file archive.cpp
3 | * @author Peter Adkins
4 | * @date 2022-07-02
5 | */
6 |
7 | #include
8 |
9 | #include "archiveentry.cpp"
10 | #include "archivereader.cpp"
11 |
12 | namespace py = pybind11;
13 |
14 | PYBIND11_MODULE(archive, module) {
15 | module.doc() = "STACS Native Extensions for Archives";
16 | module.attr("__name__") = "stacs.native.archive";
17 |
18 | py::class_(module, "ArchiveReader")
19 | .def(py::init())
20 | .def_property_readonly("filename", &ArchiveReader::getFilename)
21 | .def("__enter__", &ArchiveReader::enter)
22 | .def("__exit__", &ArchiveReader::exit)
23 | .def("__iter__", &ArchiveReader::iter)
24 | .def("__next__", &ArchiveReader::next)
25 | .def("read", &ArchiveReader::read)
26 | .doc() = "An interface to read archive contents (via libarchive)";
27 |
28 | py::class_(module, "ArchiveEntry")
29 | .def_property_readonly("filename", &ArchiveEntry::getFilename)
30 | .def_property_readonly("isdir", &ArchiveEntry::isDirectory)
31 | .def_property_readonly("size", &ArchiveEntry::getSize)
32 | .doc() = "Represents a member of an Archive";
33 |
34 | py::register_exception(module, "ArchiveError");
35 | }
36 |
--------------------------------------------------------------------------------
/stacs/native/archive/src/archiveentry.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file archivereader.cpp
3 | * @author Peter Adkins
4 | * @date 2022-07-02
5 | */
6 |
7 | #include "archiveentry.hpp"
8 |
9 | #include
10 |
11 | #include
12 |
13 | ArchiveEntry::ArchiveEntry(struct archive_entry *entry) {
14 | this->entry = entry;
15 | }
16 |
17 | ArchiveEntry::~ArchiveEntry() {
18 | }
19 |
20 | /**
21 | * Gets the filename of the archive member.
22 | *
23 | * @return std::string
24 | */
25 | std::string ArchiveEntry::getFilename() {
26 | return archive_entry_pathname_utf8(this->entry);
27 | }
28 |
29 | /**
30 | * Gets the file size of the archive member.
31 | *
32 | * @return int64_t
33 | */
34 | int64_t ArchiveEntry::getSize() {
35 | return archive_entry_size(this->entry);
36 | }
37 |
38 | /**
39 | * Checks whether the current archive member is a directory.
40 | *
41 | * @return bool
42 | */
43 | bool ArchiveEntry::isDirectory() {
44 | if (S_ISDIR(archive_entry_mode(this->entry)) != 0) {
45 | return true;
46 | } else {
47 | return false;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/stacs/native/archive/src/archiveentry.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file archivereader.cpp
3 | * @author Peter Adkins
4 | * @date 2022-07-02
5 | */
6 |
7 | #pragma once
8 |
9 | extern "C" {
10 | #include
11 | #include
12 | }
13 |
14 | #include
15 |
16 | class ArchiveEntry {
17 | public:
18 | ArchiveEntry(struct archive_entry *entry);
19 | ~ArchiveEntry();
20 |
21 | std::string getFilename();
22 | int64_t getSize();
23 | bool isDirectory();
24 |
25 | private:
26 | struct archive_entry *entry;
27 | };
28 |
--------------------------------------------------------------------------------
/stacs/native/archive/src/archivereader.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file archivereader.cpp
3 | * @author Peter Adkins
4 | * @date 2022-07-02
5 | */
6 |
7 | #include "archivereader.hpp"
8 |
9 | #include "archiveentry.hpp"
10 |
11 | extern "C" {
12 | #include
13 | #include
14 | }
15 |
16 | const char *ArchiveError::what() const noexcept {
17 | return "Unable to open archive for reading\n";
18 | }
19 |
20 | ArchiveReader::ArchiveReader(const std::string &filename) : filename(filename) {
21 | }
22 |
23 | ArchiveReader::~ArchiveReader() {
24 | }
25 |
26 | ArchiveReader *ArchiveReader::iter() {
27 | return this;
28 | }
29 |
30 | /**
31 | * Gets the filename of the currently open file.
32 | *
33 | * @return std::string
34 | */
35 | std::string ArchiveReader::getFilename() {
36 | return this->filename;
37 | }
38 |
39 | /**
40 | * Reads the currently selected archive member into a buffer, returning the
41 | * number of bytes read. 0 will be returned when no more data is available.
42 | *
43 | * @return int
44 | */
45 | pybind11::bytes ArchiveReader::read() {
46 | std::vector chunk;
47 | chunk.resize(CHUNK_SIZE);
48 |
49 | int result = archive_read_data(this->archive,
50 | chunk.data(),
51 | chunk.size());
52 |
53 | if (result < 0) {
54 | throw ArchiveError();
55 | }
56 |
57 | return pybind11::bytes(chunk.data(), result);
58 | }
59 |
60 | /**
61 | * Find and return the next member in the archive.
62 | *
63 | * @return ArchiveEntry
64 | */
65 | ArchiveEntry ArchiveReader::next() {
66 | int result = archive_read_next_header(this->archive, &this->entry);
67 |
68 | if (result == ARCHIVE_OK) {
69 | return ArchiveEntry(this->entry);
70 | }
71 | if (result == ARCHIVE_EOF) {
72 | throw pybind11::stop_iteration();
73 | }
74 |
75 | throw ArchiveError();
76 | }
77 |
78 | /**
79 | * Loads an archive on Python Context Manager enter.
80 | *
81 | * @return ArchiveReader*
82 | */
83 | ArchiveReader *ArchiveReader::enter() {
84 | this->archive = archive_read_new();
85 |
86 | // Enable all libarchive supported filters and formats.
87 | archive_read_support_filter_all(this->archive);
88 | archive_read_support_format_all(this->archive);
89 |
90 | // Attempt to open the archive.
91 | int result = archive_read_open_filename(this->archive,
92 | this->filename.c_str(),
93 | 10240);
94 |
95 | if (result != ARCHIVE_OK) {
96 | throw ArchiveError();
97 | }
98 |
99 | return this;
100 | }
101 |
102 | /**
103 | * Cleans up the open archive on Python Context Manager exit.
104 | *
105 | * @return true
106 | */
107 | bool ArchiveReader::exit(pybind11::object exc_type,
108 | pybind11::object exc_value,
109 | pybind11::object exc_traceback) {
110 | int result = archive_read_free(this->archive);
111 |
112 | if (result == ARCHIVE_OK) {
113 | return true;
114 | }
115 |
116 | return false;
117 | }
118 |
--------------------------------------------------------------------------------
/stacs/native/archive/src/archivereader.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file archivereader.hpp
3 | * @author Peter Adkins
4 | * @date 2022-07-02
5 | */
6 |
7 | #pragma once
8 | #include
9 |
10 | #include
11 | #include
12 |
13 | const int CHUNK_SIZE = 10240;
14 |
15 | class ArchiveEntry;
16 |
17 | class ArchiveReader {
18 | public:
19 | ArchiveReader(const std::string &filename);
20 | ~ArchiveReader();
21 |
22 | ArchiveReader *enter();
23 | bool exit(pybind11::object exc_type,
24 | pybind11::object exc_value,
25 | pybind11::object exc_traceback);
26 |
27 | pybind11::bytes read();
28 | ArchiveEntry next();
29 | ArchiveReader *iter();
30 | std::string getFilename();
31 |
32 | private:
33 | std::vector chunk;
34 | std::string filename;
35 | struct archive *archive;
36 | struct archive_entry *entry;
37 | };
38 |
39 | struct ArchiveError : std::exception {
40 | const char *what() const noexcept;
41 | };
42 |
--------------------------------------------------------------------------------
/stacs/scan/__about__.py:
--------------------------------------------------------------------------------
1 | """STACS - Static Token And Credential Scanner.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | __title__ = "stacs"
7 | __summary__ = "Static Token And Credential Scanner."
8 | __version__ = "0.5.1"
9 | __author__ = "Peter Adkins"
10 | __uri__ = "https://www.github.com/stacscan/stacs/"
11 | __license__ = "BSD-3-Clause"
12 |
--------------------------------------------------------------------------------
/stacs/scan/__init__.py:
--------------------------------------------------------------------------------
1 | """STACS - Static Token And Credential Scanner.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan import __about__ # noqa:F401
7 | from stacs.scan import constants # noqa:F401
8 | from stacs.scan import filter # noqa:F401
9 | from stacs.scan import helper # noqa:F401
10 | from stacs.scan import loader # noqa:F401
11 | from stacs.scan import model # noqa:F401
12 | from stacs.scan import output # noqa:F401
13 | from stacs.scan import scanner # noqa:F401
14 |
--------------------------------------------------------------------------------
/stacs/scan/constants.py:
--------------------------------------------------------------------------------
1 | """Define constants commonly used throughout STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | # The size of chunks to use when reading files.
7 | CHUNK_SIZE = 65536
8 |
9 | # The size, in bytes, of the sample window.
10 | WINDOW_SIZE = 20
11 |
12 | # Define the default cache directory, used to unpack archives into.
13 | CACHE_DIRECTORY = "/tmp"
14 |
15 | # Define the character to use when constructed paths to findings which are inside of
16 | # archives.
17 | ARCHIVE_FILE_SEPARATOR = "!"
18 |
19 | # Define an exit code to use when there are unsuppressed findings.
20 | EXIT_CODE_UNSUPPRESSED = 100
21 |
22 | # External licenses will be displayed during STACS banner.
23 | EXTERNAL_LICENSES = {
24 | "libarchive": [
25 | "https://github.com/libarchive/libarchive/blob/master/COPYING",
26 | ],
27 | "yara": [
28 | "https://github.com/VirusTotal/yara-python/blob/master/LICENSE",
29 | ],
30 | }
31 |
--------------------------------------------------------------------------------
/stacs/scan/entrypoint/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines STACS entrypoints.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.entrypoint import cli # noqa:F401
7 |
--------------------------------------------------------------------------------
/stacs/scan/entrypoint/cli.py:
--------------------------------------------------------------------------------
1 | """Defines the primary STACS CLI entrypoint.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import logging
7 | import os
8 | import shutil
9 | import sys
10 | import time
11 | from types import TracebackType
12 | from typing import Callable, List
13 |
14 | import click
15 | import stacs
16 |
17 |
18 | def unlink_error(function: Callable, path: str, exc_info: TracebackType):
19 | """Provides a mechanism to better handle failures to delete files after a run.
20 |
21 | Currently, this just logs out. In future we should look to fix the permissions on
22 | the path / parent and call func(path) to attempt the deletion again. However, we'll
23 | need to ensure that path is actually part of the cache directory. So for now, we
24 | log.
25 | """
26 | logger = logging.getLogger("stacs")
27 | logger.warning(f"Unable to remove {path}")
28 |
29 |
30 | @click.command()
31 | @click.version_option()
32 | @click.option(
33 | "--debug",
34 | is_flag=True,
35 | help="Increase verbosity of logs for debugging",
36 | )
37 | @click.option(
38 | "--pretty",
39 | help="Display outputs in a human-readable tree, rather than SARIF.",
40 | is_flag=True,
41 | )
42 | @click.option(
43 | "--threads",
44 | help="The number of threads to use when processing files",
45 | default=10,
46 | )
47 | @click.option(
48 | "--rule-pack",
49 | help="The path to the rule pack to load.",
50 | default="~/.stacs/pack.json",
51 | )
52 | @click.option(
53 | "--ignore-list",
54 | help="The path to the ignore list to load (if required).",
55 | )
56 | @click.option(
57 | "--skip-unprocessable",
58 | help="Skip unprocessable / corrupt archives with a warning.",
59 | is_flag=True,
60 | )
61 | @click.option(
62 | "--cache-directory",
63 | help="The path to use as a cache - used when unpacking archives.",
64 | default=stacs.scan.constants.CACHE_DIRECTORY,
65 | )
66 | @click.argument("paths", nargs=-1, required=True)
67 | def main(
68 | debug: bool,
69 | pretty: bool,
70 | threads: int,
71 | rule_pack: str,
72 | ignore_list: str,
73 | skip_unprocessable: bool,
74 | cache_directory: str,
75 | paths: List[str],
76 | ) -> None:
77 | """STACS - Static Token And Credential Scanner."""
78 | logging.basicConfig(
79 | level=logging.DEBUG if debug else logging.INFO,
80 | format="%(asctime)s - %(process)d - [%(levelname)s] %(message)s",
81 | )
82 | logger = logging.getLogger("stacs")
83 | logger.info(f"STACS running with {threads} threads")
84 |
85 | # Licenses.
86 | for project, urls in stacs.scan.constants.EXTERNAL_LICENSES.items():
87 | logger.info(f"STACS uses {project} (licenses may be found at {' '.join(urls)})")
88 |
89 | # Load the rule pack.
90 | logger.info(f"Attempting to load rule pack from {rule_pack}")
91 | try:
92 | pack = stacs.scan.model.pack.from_file(rule_pack)
93 | except stacs.scan.exceptions.STACSException as err:
94 | logger.error(f"Unable to load rule pack: {err}")
95 | sys.exit(-1)
96 |
97 | # Load the ignore list.
98 | ignored = []
99 | if ignore_list:
100 | logger.info(f"Attempting to load ignore list from {ignore_list}")
101 | try:
102 | ignored = stacs.scan.model.ignore_list.from_file(ignore_list)
103 | logger.debug(f"Loaded {len(ignored.ignore)} suppressions from ignore list.")
104 | except stacs.scan.exceptions.STACSException as err:
105 | logger.error(f"Unable to load ignore list: {err}")
106 | sys.exit(-1)
107 |
108 | # Append a timestamp to the cache directory to reduce the chance of collisions.
109 | cache_directory = os.path.join(cache_directory, str(int(time.time_ns() / 1000)))
110 | try:
111 | os.mkdir(cache_directory)
112 | logger.info(f"Using cache directory at {cache_directory}")
113 | except OSError as err:
114 | logger.error(f"Unable to create cache directory at {cache_directory}: {err}")
115 | sys.exit(-2)
116 |
117 | # Generate a list of candidate files to scan.
118 | targets = []
119 |
120 | for path in paths:
121 | path = os.path.abspath(os.path.expanduser(path))
122 | logger.info(f"Attempting to get a list of files to scan from {path}")
123 | try:
124 | targets.extend(
125 | stacs.scan.loader.filepath.finder(
126 | path,
127 | cache_directory,
128 | skip_on_corrupt=skip_unprocessable,
129 | workers=threads,
130 | )
131 | )
132 | except stacs.scan.exceptions.STACSException as err:
133 | logger.error(f"Unable to generate file list: {err}")
134 | sys.exit(-2)
135 |
136 | # Submit files for analysis.
137 | logger.info(f"Found {len(targets)} files for analysis")
138 |
139 | findings = []
140 | for scanner in stacs.scan.scanner.__all__:
141 | try:
142 | findings.extend(
143 | getattr(stacs.scan.scanner, scanner).run(targets, pack, workers=threads)
144 | )
145 | except stacs.scan.exceptions.InvalidFormatException as err:
146 | logger.error(f"Unable to load a rule in scanner {scanner}: {err}")
147 | continue
148 |
149 | # Filter findings by allow list.
150 | if ignored:
151 | findings = stacs.scan.filter.ignore_list.process(findings, ignored)
152 |
153 | # Clean-up cache directory.
154 | shutil.rmtree(cache_directory, onerror=unlink_error)
155 |
156 | # Determine the correct exit status based on whether there were unsuppressed
157 | # findings.
158 | exit_code = 0
159 |
160 | for finding in findings:
161 | if not finding.ignore:
162 | exit_code = stacs.scan.constants.EXIT_CODE_UNSUPPRESSED
163 |
164 | # Pretty print, if requested.
165 | if pretty:
166 | logger.info("Generating 'pretty' output from findings")
167 | stacs.scan.output.pretty.render(findings, pack)
168 | sys.exit(exit_code)
169 |
170 | # Default to SARIF output to STDOUT.
171 | logger.info("Generating SARIF from findings")
172 | try:
173 | sarif = stacs.scan.output.sarif.render(path, findings, pack)
174 | except stacs.scan.exceptions.STACSException as err:
175 | logger.error(f"Unable to generate SARIF: {err}")
176 | sys.exit(-3)
177 |
178 | # TODO: Add file output as an option.
179 | logger.info(f"Found {len(findings)} findings")
180 | print(sarif)
181 |
--------------------------------------------------------------------------------
/stacs/scan/exceptions.py:
--------------------------------------------------------------------------------
1 | """STACS Exceptions.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 |
7 | class STACSException(Exception):
8 | """The most generic form of exception raised by STACS."""
9 |
10 |
11 | class FileAccessException(STACSException):
12 | """Indicates an error occured while attempting to access a file."""
13 |
14 |
15 | class InvalidFileException(STACSException):
16 | """Indicates the format of a file did not match what was expected."""
17 |
18 |
19 | class InvalidFormatException(STACSException):
20 | """Indicates that the format of a rule did not match what was expected."""
21 |
22 |
23 | class IgnoreListException(STACSException):
24 | """Indicates an invalid ignore list was provided."""
25 |
26 |
27 | class NotImplementedException(STACSException):
28 | """Indicates that the requested method has not been implemented."""
29 |
30 |
31 | class NoParentException(STACSException):
32 | """Indicates that a finding does not have a parent."""
33 |
--------------------------------------------------------------------------------
/stacs/scan/filter/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines filters supported by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.filter import ignore_list # noqa: F401
7 |
--------------------------------------------------------------------------------
/stacs/scan/filter/ignore_list.py:
--------------------------------------------------------------------------------
1 | """Defines a filter which sets the ignore flag on entries present in an ignore list.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import re
7 | from typing import List
8 |
9 | from stacs.scan.exceptions import IgnoreListException
10 | from stacs.scan.model import finding, ignore_list
11 |
12 |
13 | def by_pattern(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
14 | """Process a regex ignore list entry."""
15 | # Short circuit if no pattern is set.
16 | if not ignore.pattern:
17 | return False
18 |
19 | # If there's a match on the path, check whether the ignore is for the same module.
20 | if re.search(ignore.pattern, finding.path):
21 | if ignore.module != finding.source.module:
22 | return False
23 |
24 | # Then check whether the ignore is for the particular reference.
25 | if ignore.references:
26 | if finding.source.reference in ignore.references:
27 | return True
28 |
29 | return False
30 |
31 | # Or check whether the ignore is for the same offest.
32 | if ignore.offset is not None:
33 | if finding.location.offset == ignore.offset:
34 | return True
35 | return False
36 |
37 | # In this case this is a fairly permissive ignore.
38 | return True
39 |
40 | return False
41 |
42 |
43 | def by_path(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
44 | """Process a path based ignore list entry."""
45 | # Short circuit if no path is set.
46 | if not ignore.path:
47 | return False
48 |
49 | # If there's a match on the hash, check whether the ignore is for the same module.
50 | if ignore.path == finding.path:
51 | if finding.source.module != ignore.module:
52 | return False
53 |
54 | # Then check whether the ignore is for the particular reference.
55 | if ignore.references:
56 | if finding.source.reference in ignore.references:
57 | return True
58 | return False
59 |
60 | # Or check whether the ignore is for the same offest.
61 | if ignore.offset is not None:
62 | if finding.location.offset == ignore.offset:
63 | return True
64 | return False
65 |
66 | # In this case this is a fairly permissive ignore.
67 | return True
68 |
69 | return False
70 |
71 |
72 | def by_hash(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
73 | """Process a hash based ignore list entry."""
74 | # Short circuit if no hash is set.
75 | if not ignore.md5:
76 | return False
77 |
78 | # If there's a match on the hash, check whether the ignore is for the same module.
79 | if ignore.md5 == finding.md5:
80 | if finding.source.module != ignore.module:
81 | return False
82 |
83 | # Then check whether the ignore is for the particular reference.
84 | if ignore.references:
85 | if finding.source.reference in ignore.references:
86 | return True
87 | return False
88 |
89 | # Or check whether the ignore is for the same offest.
90 | if ignore.offset is not None:
91 | if finding.location.offset == ignore.offset:
92 | return True
93 | return False
94 |
95 | # In this case this is a fairly permissive ignore.
96 | return True
97 |
98 | return False
99 |
100 |
101 | def process(
102 | findings: List[finding.Entry],
103 | ignore_list: ignore_list.Format,
104 | ) -> List[finding.Entry]:
105 | """Processes an ignore list and marks the relevant findings as ignored."""
106 | filtered_findings = []
107 |
108 | for entry in findings:
109 | for ignore in ignore_list.ignore:
110 | try:
111 | if by_path(entry, ignore):
112 | ignore = finding.Ignore(
113 | ignored=True,
114 | reason=ignore.reason,
115 | )
116 | entry.ignore = ignore
117 | break
118 |
119 | if by_pattern(entry, ignore):
120 | ignore = finding.Ignore(
121 | ignored=True,
122 | reason=ignore.reason,
123 | )
124 | entry.ignore = ignore
125 | break
126 |
127 | if by_hash(entry, ignore):
128 | ignore = finding.Ignore(
129 | ignored=True,
130 | reason=ignore.reason,
131 | )
132 | entry.ignore = ignore
133 | break
134 | except re.error as err:
135 | raise IgnoreListException(
136 | f"Error in ignore list entry '{ignore.reason}': {err}"
137 | )
138 |
139 | # Add the finding to our results, whether updated or not.
140 | filtered_findings.append(entry)
141 |
142 | return filtered_findings
143 |
--------------------------------------------------------------------------------
/stacs/scan/helper.py:
--------------------------------------------------------------------------------
1 | """Define helpers commonly used throughout STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | from typing import List
6 |
7 | import colorama
8 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
9 | from stacs.scan.exceptions import NoParentException
10 |
11 |
12 | def generate_virtual_path(
13 | finding: "Finding", # noqa: F821
14 | artifacts: "List[Artifact]", # noqa: F821
15 | ):
16 | """Generate a virtual path for an input file."""
17 | virtual_path = finding.filepath
18 |
19 | try:
20 | parent = artifacts[finding.artifact].parent
21 |
22 | while True:
23 | name = artifacts[parent].filepath
24 | virtual_path = f"{name}{ARCHIVE_FILE_SEPARATOR}{virtual_path}"
25 |
26 | parent = artifacts[parent].parent
27 | except NoParentException:
28 | return virtual_path
29 |
30 |
31 | def printi(string, indent: int = 4, prefix: str = None):
32 | """Super janky wrapper to print something indented."""
33 | for line in string.splitlines():
34 | if prefix:
35 | print(f"{prefix}", end="")
36 |
37 | print(f"{' ' * indent}" + line)
38 |
39 |
40 | def banner(version: str) -> str:
41 | """Returns a STACS console banner."""
42 | banner = colorama.Fore.BLUE
43 | banner += rf"""
44 | ______________ ___________
45 | / ___/_ __/ | / ____/ ___/
46 | \__ \ / / / /| |/ / \__ \
47 | ___/ // / / ___ / /___ ___/ /
48 | /____//_/ /_/ |_\____//____/
49 |
50 | STACS version {version}
51 | """
52 | return banner
53 |
--------------------------------------------------------------------------------
/stacs/scan/loader/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines loaders used by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.loader import archive # noqa:F401
7 | from stacs.scan.loader import filepath # noqa:F401
8 | from stacs.scan.loader import manifest # noqa:F401
9 |
--------------------------------------------------------------------------------
/stacs/scan/loader/archive.py:
--------------------------------------------------------------------------------
1 | """Defines handlers for unpacking of archives.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import bz2
7 | import gzip
8 | import hashlib
9 | import logging
10 | import lzma
11 | import os
12 | import shutil
13 | import tarfile
14 | import zipfile
15 | import zlib
16 | from typing import List, Tuple
17 |
18 | import zstandard
19 | from stacs.native import archive
20 | from stacs.scan.constants import CHUNK_SIZE
21 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
22 | from stacs.scan.loader.format import dmg, xar
23 |
24 |
25 | def path_hash(filepath: str) -> str:
26 | """Returns a hash of the filepath, for use with unique directory creation."""
27 | return hashlib.md5(bytes(filepath, "utf-8")).hexdigest()
28 |
29 |
30 | def zip_handler(filepath: str, directory: str) -> None:
31 | """Attempts to extract the provided zip archive."""
32 | log = logging.getLogger(__name__)
33 |
34 | try:
35 | os.mkdir(directory, mode=0o700)
36 | except OSError as err:
37 | raise FileAccessException(
38 | f"Unable to create unpack directory at {directory}: {err}"
39 | )
40 |
41 | # Attempt to unpack the zipfile to the new unpack directory.
42 | try:
43 | with zipfile.ZipFile(filepath, "r") as reader:
44 | try:
45 | reader.extractall(directory)
46 | except RuntimeError as err:
47 | # Encrypted zips (why is this not a custom exception?!)
48 | if "encrypted" in str(err):
49 | log.warn(
50 | f"Cannot process file in archive at {filepath}, skipping: {err}"
51 | )
52 | except NotADirectoryError as err:
53 | # Broken filepaths inside of ZIP.
54 | log.warn(
55 | f"Cannot process file in archive at {filepath}, skipping: {err}"
56 | )
57 | except (OSError, IndexError) as err:
58 | # Several conditions, but usually a corrupt / bad input zip.
59 | log.warn(
60 | f"Cannot process file in archive at {filepath}, skipping: {err}"
61 | )
62 | except (zipfile.BadZipFile, OSError) as err:
63 | raise InvalidFileException(
64 | f"Unable to extract archive {filepath} to {directory}: {err}"
65 | )
66 |
67 |
68 | def tar_handler(filepath: str, directory: str) -> None:
69 | """Attempts to extract the provided tarball."""
70 | try:
71 | os.mkdir(directory, mode=0o700)
72 | except OSError as err:
73 | raise FileAccessException(
74 | f"Unable to create unpack directory at {directory}: {err}"
75 | )
76 |
77 | # Attempt to unpack the tarball to the new unpack directory.
78 | try:
79 | with tarfile.open(filepath, "r") as reader:
80 | reader.extractall(directory)
81 | except (PermissionError, tarfile.TarError) as err:
82 | raise InvalidFileException(
83 | f"Unable to extract archive {filepath} to {directory}: {err}"
84 | )
85 |
86 |
87 | def gzip_handler(filepath: str, directory: str) -> None:
88 | """Attempts to extract the provided gzip archive."""
89 | output = ".".join(os.path.basename(filepath).split(".")[:-1])
90 |
91 | # No dots? Just use the name as is.
92 | if len(output) < 1:
93 | output = os.path.basename(filepath)
94 |
95 | # Although gzip files cannot contain more than one file, we'll still spool into
96 | # a subdirectory under the cache for consistency.
97 | try:
98 | os.mkdir(directory, mode=0o700)
99 | except OSError as err:
100 | raise FileAccessException(
101 | f"Unable to create unpack directory at {directory}: {err}"
102 | )
103 |
104 | # TODO: This can likely be optimized for tgz files, as currently the file will be
105 | # first processed and gunzipped, and then reprocessed to be extracted.
106 | try:
107 | with gzip.open(filepath, "rb") as fin:
108 | with open(os.path.join(directory, output), "wb") as fout:
109 | shutil.copyfileobj(fin, fout, CHUNK_SIZE)
110 | except gzip.BadGzipFile as err:
111 | raise InvalidFileException(
112 | f"Unable to extract archive {filepath} to {output}: {err}"
113 | )
114 |
115 |
116 | def bzip2_handler(filepath: str, directory: str) -> None:
117 | """Attempts to extract the provided bzip2 archive."""
118 | output = ".".join(os.path.basename(filepath).split(".")[:-1])
119 |
120 | # No dots? Just use the name as is.
121 | if len(output) < 1:
122 | output = os.path.basename(filepath)
123 |
124 | # Like gzip, bzip2 cannot support more than a single file. Again, we'll spool into
125 | # a subdirectory for consistency.
126 | try:
127 | os.mkdir(directory, mode=0o700)
128 | except OSError as err:
129 | raise FileAccessException(
130 | f"Unable to create unpack directory at {directory}: {err}"
131 | )
132 |
133 | # TODO: This can likely be optimized for tbz files, as currently the file will be
134 | # first processed and gunzipped, and then reprocessed to be extracted.
135 | try:
136 | with bz2.open(filepath, "rb") as fin:
137 | with open(os.path.join(directory, output), "wb") as fout:
138 | shutil.copyfileobj(fin, fout, CHUNK_SIZE)
139 | except (OSError, ValueError) as err:
140 | raise InvalidFileException(
141 | f"Unable to extract archive {filepath} to {output}: {err}"
142 | )
143 |
144 |
145 | def zstd_handler(filepath: str, directory: str) -> None:
146 | """Attempts to extract the provided zstd archive."""
147 | output = ".".join(os.path.basename(filepath).split(".")[:-1])
148 |
149 | # No dots? Just use the name as is.
150 | if len(output) < 1:
151 | output = os.path.basename(filepath)
152 |
153 | # zstd does not appear to provide a native mechanism to compress multiple files,
154 | # and recommend 'to combine zstd with tar'.
155 | try:
156 | os.mkdir(directory, mode=0o700)
157 | except OSError as err:
158 | raise FileAccessException(
159 | f"Unable to create unpack directory at {directory}: {err}"
160 | )
161 |
162 | try:
163 | decompressor = zstandard.ZstdDecompressor()
164 |
165 | with open(filepath, "rb") as fin:
166 | with open(os.path.join(directory, output), "wb") as fout:
167 | decompressor.copy_stream(fin, fout, read_size=CHUNK_SIZE)
168 | except (OSError, ValueError, zstandard.ZstdError) as err:
169 | raise InvalidFileException(
170 | f"Unable to extract archive {filepath} to {output}: {err}"
171 | )
172 |
173 |
174 | def lzma_handler(filepath: str, directory: str) -> None:
175 | """Attempts to extract the provided xz / lzma archive."""
176 | output = ".".join(os.path.basename(filepath).split(".")[:-1])
177 |
178 | # No dots? Just use the name as is.
179 | if len(output) < 1:
180 | output = os.path.basename(filepath)
181 |
182 | # Although xz files cannot contain more than one file, we'll still spool into
183 | # a subdirectory under the cache for consistency.
184 | try:
185 | os.mkdir(directory, mode=0o700)
186 | except OSError as err:
187 | raise FileAccessException(
188 | f"Unable to create unpack directory at {directory}: {err}"
189 | )
190 |
191 | try:
192 | with lzma.open(filepath, "rb") as fin:
193 | with open(os.path.join(directory, output), "wb") as fout:
194 | shutil.copyfileobj(fin, fout, CHUNK_SIZE)
195 | except lzma.LZMAError as err:
196 | raise InvalidFileException(
197 | f"Unable to extract archive {filepath} to {output}: {err}"
198 | )
199 |
200 |
201 | def zlib_handler(filepath: str, directory: str) -> None:
202 | """Attempts to extract the provided zlib archive."""
203 | output = ".".join(os.path.basename(filepath).split(".")[:-1])
204 |
205 | # No dots? Just use the name as is.
206 | if len(output) < 1:
207 | output = os.path.basename(filepath)
208 |
209 | try:
210 | os.mkdir(directory, mode=0o700)
211 | except OSError as err:
212 | raise FileAccessException(
213 | f"Unable to create unpack directory at {directory}: {err}"
214 | )
215 |
216 | try:
217 | decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS)
218 |
219 | with open(filepath, "rb") as fin:
220 | with open(os.path.join(directory, output), "wb") as fout:
221 | while compressed := fin.read(CHUNK_SIZE):
222 | fout.write(decompressor.decompress(compressed))
223 | except zlib.error as err:
224 | raise InvalidFileException(
225 | f"Unable to extract archive {filepath} to {output}: {err}"
226 | )
227 |
228 |
229 | def xar_handler(filepath: str, directory: str) -> None:
230 | """Attempts to extract the provided XAR archive."""
231 | try:
232 | os.mkdir(directory, mode=0o700)
233 | except OSError as err:
234 | raise FileAccessException(
235 | f"Unable to create unpack directory at {directory}: {err}"
236 | )
237 |
238 | # Attempt to unpack the archive.
239 | try:
240 | archive = xar.XAR(filepath)
241 | archive.extract(directory)
242 | except FileAccessException as err:
243 | raise FileAccessException(
244 | f"Unable to extract archive {filepath} to {directory}: {err}"
245 | )
246 | except InvalidFileException as err:
247 | raise InvalidFileException(
248 | f"Unable to extract archive {filepath} to {directory}: {err}"
249 | )
250 |
251 |
252 | def dmg_handler(filepath: str, directory: str) -> None:
253 | """Attempts to extract the provided DMG archive."""
254 | try:
255 | os.mkdir(directory, mode=0o700)
256 | except OSError as err:
257 | raise FileAccessException(
258 | f"Unable to create unpack directory at {directory}: {err}"
259 | )
260 |
261 | # Attempt to unpack the archive.
262 | try:
263 | archive = dmg.DMG(filepath)
264 | archive.extract(directory)
265 | except FileAccessException as err:
266 | raise FileAccessException(
267 | f"Unable to extract archive {filepath} to {directory}: {err}"
268 | )
269 | except InvalidFileException as err:
270 | raise InvalidFileException(
271 | f"Unable to extract archive {filepath} to {directory}: {err}"
272 | )
273 |
274 |
275 | def libarchive_handler(filepath: str, directory: str) -> None:
276 | """Attempts to extract the provided archive with libarchive."""
277 | try:
278 | os.mkdir(directory, mode=0o700)
279 | except OSError as err:
280 | raise FileAccessException(
281 | f"Unable to create unpack directory at {directory}: {err}"
282 | )
283 |
284 | # Attempt to unpack the archive to the new unpack directory.
285 | try:
286 | with archive.ArchiveReader(filepath) as reader:
287 | for entry in reader:
288 | member = entry.filename
289 | member = member.lstrip("../")
290 | member = member.lstrip("./")
291 |
292 | if entry.filename == ".":
293 | continue
294 |
295 | destination = os.path.join(directory, member)
296 | parent = os.path.dirname(destination)
297 |
298 | # Handle odd cases where a file was created where a directory needs to
299 | # be.
300 | if os.path.exists(parent) and os.path.isfile(parent):
301 | os.unlink(parent)
302 |
303 | if os.path.isdir(destination):
304 | continue
305 |
306 | # Create parent directories, as required.
307 | if not os.path.isdir(parent):
308 | os.makedirs(parent)
309 |
310 | # If the entry is a directory, create it and move on.
311 | if entry.isdir:
312 | os.makedirs(destination, exist_ok=True)
313 | continue
314 |
315 | with open(destination, "wb") as fout:
316 | while True:
317 | chunk = reader.read()
318 | if len(chunk) > 0:
319 | fout.write(chunk)
320 | continue
321 | break
322 | except archive.ArchiveError as err:
323 | raise InvalidFileException(
324 | f"Unable to extract archive {filepath} to {directory}: {err}"
325 | )
326 |
327 |
328 | def get_mimetype(chunk: bytes, start: bool) -> List[Tuple[int, str]]:
329 | """Attempts to locate the appropriate handler for a given file.
330 |
331 | This may fail if the required "magic" is at an offset greater than the CHUNK_SIZE.
332 | However, currently this is not an issue, but may need to be revisited later as more
333 | archive types are supported.
334 |
335 | The start flag is used to indicate whether the current chunk is from the start of
336 | the file, or the end of the file. Today we only support checking the first and last
337 | chunk.
338 |
339 | Returns a list of weights and MIME types as a tuple. This weight is specified by
340 | handlers and is used to allow "container" formats, which may contain multiple other
341 | files of various matching types, to "win" the match - due to a higher weight.
342 | """
343 | for name, options in MIME_TYPE_HANDLERS.items():
344 | offset = options["offset"]
345 | magic = options["magic"]
346 |
347 | # If looking at the last chunk, only use negative offsets. This is to prevent
348 | # false positives as position 0 in the last chunk is actually N bytes into the
349 | # file. This is especially problematic for formats with short magic numbers,
350 | # such as zlib.
351 | if not start and offset >= 0:
352 | continue
353 |
354 | # TODO: How to handle multiple matches in the same chunk? Is this this likely?
355 | for format in magic:
356 | if chunk[offset : (offset + len(format))] == format: # noqa: E203
357 | return (options["weight"], name)
358 |
359 | return (0, None)
360 |
361 |
362 | # Define all supported archives and their handlers. As we currently only support a small
363 | # list of types we can just define file magic directly here, rather than use an external
364 | # library. This removes the need for dependencies which may have other system
365 | # dependencies - such as libmagic. It should also provide a small a speed up during
366 | # unpacking, as we're only looking for a small number of types.
367 | MIME_TYPE_HANDLERS = {
368 | "application/x-tar": {
369 | "weight": 1,
370 | "offset": 257,
371 | "magic": [
372 | bytearray([0x75, 0x73, 0x74, 0x61, 0x72]),
373 | ],
374 | "handler": tar_handler,
375 | },
376 | "application/gzip": {
377 | "weight": 1,
378 | "offset": 0,
379 | "magic": [
380 | bytearray([0x1F, 0x8B]),
381 | ],
382 | "handler": gzip_handler,
383 | },
384 | "application/x-bzip2": {
385 | "weight": 1,
386 | "offset": 0,
387 | "magic": [
388 | bytearray([0x42, 0x5A, 0x68]),
389 | ],
390 | "handler": bzip2_handler,
391 | },
392 | "application/zip": {
393 | "weight": 1,
394 | "offset": 0,
395 | "magic": [
396 | bytearray([0x50, 0x4B, 0x03, 0x04]),
397 | bytearray([0x50, 0x4B, 0x05, 0x06]),
398 | bytearray([0x50, 0x4B, 0x07, 0x08]),
399 | ],
400 | "handler": zip_handler,
401 | },
402 | "application/zlib": {
403 | "weight": 1,
404 | "offset": 0,
405 | "magic": [
406 | bytearray([0x78, 0x01]),
407 | bytearray([0x78, 0x5E]),
408 | bytearray([0x78, 0x9C]),
409 | bytearray([0x78, 0xDA]),
410 | ],
411 | "handler": zlib_handler,
412 | },
413 | "application/x-xz": {
414 | "weight": 1,
415 | "offset": 0,
416 | "magic": [
417 | bytearray([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]),
418 | ],
419 | "handler": lzma_handler,
420 | },
421 | "application/x-rpm": {
422 | "weight": 1,
423 | "offset": 0,
424 | "magic": [
425 | bytearray([0xED, 0xAB, 0xEE, 0xDB]),
426 | ],
427 | "handler": libarchive_handler,
428 | },
429 | "application/x-iso9660-image": {
430 | "weight": 1,
431 | "offset": 0x8001,
432 | "magic": [
433 | bytearray([0x43, 0x44, 0x30, 0x30, 0x31]),
434 | ],
435 | "handler": libarchive_handler,
436 | },
437 | "application/x-7z-compressed": {
438 | "weight": 1,
439 | "offset": 0,
440 | "magic": [
441 | bytearray([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]),
442 | ],
443 | "handler": libarchive_handler,
444 | },
445 | "application/x-cpio": {
446 | "weight": 1,
447 | "offset": 0,
448 | "magic": [
449 | bytearray([0xC7, 0x71]), # 070707 in octal (Little Endian).
450 | bytearray([0x71, 0xC7]), # 070707 in octal (Big Endian).
451 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x31]), # "070701"
452 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x32]), # "070702"
453 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x37]), # "070707"
454 | ],
455 | "handler": libarchive_handler,
456 | },
457 | "application/x-xar": {
458 | "weight": 1,
459 | "offset": 0,
460 | "magic": [
461 | bytearray([0x78, 0x61, 0x72, 0x21]),
462 | ],
463 | "handler": xar_handler,
464 | },
465 | "application/vnd.ms-cab-compressed": {
466 | "weight": 1,
467 | "offset": 0,
468 | "magic": [
469 | bytearray([0x4D, 0x53, 0x43, 0x46]),
470 | ],
471 | "handler": libarchive_handler,
472 | },
473 | "application/x-archive": {
474 | "weight": 1,
475 | "offset": 0,
476 | "magic": [
477 | bytearray([0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E]),
478 | ],
479 | "handler": libarchive_handler,
480 | },
481 | "application/vnd.rar": {
482 | "weight": 1,
483 | "offset": 0,
484 | "magic": [
485 | bytearray([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]),
486 | ],
487 | "handler": libarchive_handler,
488 | },
489 | "application/zstd": {
490 | "weight": 1,
491 | "offset": 0,
492 | "magic": [
493 | bytearray([0x28, 0xB5, 0x2F, 0xFD]),
494 | ],
495 | "handler": zstd_handler,
496 | },
497 | "application/x-apple-diskimage": {
498 | "weight": 2, # "container" formats are weighted higher.
499 | "offset": -512,
500 | "magic": [
501 | bytearray([0x6B, 0x6F, 0x6C, 0x79]),
502 | ],
503 | "handler": dmg_handler,
504 | },
505 | }
506 |
--------------------------------------------------------------------------------
/stacs/scan/loader/filepath.py:
--------------------------------------------------------------------------------
1 | """Defines a file path loader for STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import hashlib
7 | import logging
8 | import os
9 | import re
10 | import shutil
11 | from concurrent.futures import ThreadPoolExecutor, as_completed
12 | from typing import List
13 |
14 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR, CHUNK_SIZE
15 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
16 | from stacs.scan.loader import archive
17 | from stacs.scan.model.manifest import Entry
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | def metadata(filepath: str, overlay: str = None, parent: str = None) -> Entry:
23 | """Generates a hash and determines the mimetype of the input file."""
24 | md5 = hashlib.md5()
25 | mime = None
26 | winner = 0
27 |
28 | # Read the file in chunks.
29 | try:
30 | stat = os.stat(filepath)
31 |
32 | with open(filepath, "rb") as fin:
33 | while chunk := fin.read(CHUNK_SIZE):
34 | md5.update(chunk)
35 |
36 | # Attempt to determine the mime-type using the first and last chunk.
37 | # Note: This may need to change further in future.
38 | if (not mime and fin.tell() <= CHUNK_SIZE) or len(chunk) < CHUNK_SIZE:
39 | start = False if len(chunk) < CHUNK_SIZE else True
40 | (score, candidate) = archive.get_mimetype(chunk, start)
41 |
42 | # Swap the winner if the score is higher.
43 | if score > winner:
44 | mime = candidate
45 | winner = score
46 | except OSError as err:
47 | raise FileAccessException(f"Unable to open file at {filepath}: {err}")
48 |
49 | return Entry(
50 | path=filepath,
51 | md5=md5.hexdigest(),
52 | mime=mime,
53 | overlay=overlay,
54 | parent=parent,
55 | size=stat.st_size,
56 | )
57 |
58 |
59 | def walker(path: str, skip_on_eacces: bool) -> List[str]:
60 | """Recursively walk a file path, returning a list of all files."""
61 | entries = []
62 |
63 | # TODO: Would moving walker to a generator yield a performance increase, or lead to
64 | # higher disk contention due to the hasher running at the same time?
65 | try:
66 | with os.scandir(path) as scan:
67 | for handle in scan:
68 | try:
69 | # Recurse on directories, but not symlinks.
70 | if handle.is_dir() and not handle.is_symlink():
71 | entries.extend(walker(handle.path, skip_on_eacces))
72 |
73 | # Track files, but not symlinks.
74 | if handle.is_file() and not handle.is_symlink():
75 | entries.append(handle.path)
76 | except PermissionError:
77 | if not skip_on_eacces:
78 | raise
79 | except OSError:
80 | # This is usually due to too many levels of symlinks. However, other
81 | # cases are likely with a large enough input.
82 | continue
83 | except NotADirectoryError:
84 | entries.append(path)
85 |
86 | return list(set(entries))
87 |
88 |
89 | def qualify(path: str) -> str:
90 | """Add the scheme to a file path, if required."""
91 | if path.startswith("/"):
92 | return f"file://{path}"
93 | else:
94 | return path
95 |
96 |
97 | def finder(
98 | path: str,
99 | cache: str,
100 | workers: int = 10,
101 | skip_on_eacces: bool = True,
102 | skip_on_corrupt: bool = False,
103 | ) -> List[Entry]:
104 | """Processes the input path, returning a list of all files and their hashes."""
105 | entries = []
106 | futures = dict()
107 |
108 | # Run the metadata enumerator in a thread pool as we're likely to be I/O bound.
109 | with ThreadPoolExecutor(max_workers=workers) as pool:
110 | futures = {
111 | pool.submit(metadata, file): file for file in walker(path, skip_on_eacces)
112 | }
113 |
114 | # A loop and counter is used here to ensure that additional work which may be
115 | # submitted during the 'final loop' isn't accidentally ignored.
116 | while True:
117 | complete = 0
118 | for future in as_completed(futures):
119 | complete += 1
120 |
121 | try:
122 | result = future.result()
123 | except FileAccessException:
124 | if not skip_on_eacces:
125 | raise
126 |
127 | # Track the result and then remove the future from the initial futures
128 | # list so that these results aren't returned again next iteration.
129 | entries.append(result)
130 | del futures[future]
131 |
132 | # Check it the file was found to be an archive, and if so, unpack it.
133 | handler = archive.MIME_TYPE_HANDLERS.get(result.mime, {}).get("handler")
134 | if not handler:
135 | continue
136 |
137 | # Remove any existing previously unpacked files, then unpack the archive
138 | # and submit extracted files back into the queue. This is to allow for
139 | # easy recursive unpacking of nested archives.
140 | destination = os.path.join(cache, archive.path_hash(result.path))
141 | shutil.rmtree(destination, ignore_errors=True)
142 |
143 | try:
144 | handler(result.path, destination)
145 | except InvalidFileException as err:
146 | # Only skip with a warning if explicitly configured to do so.
147 | if skip_on_corrupt:
148 | logger.warning(
149 | f"Skipping file at {result.path} due to error when "
150 | f"processing: {err}"
151 | )
152 | else:
153 | raise
154 |
155 | for file in walker(destination, skip_on_eacces):
156 | # The overlay path is a 'virtual' path that is constructed based on
157 | # the archive the file appears inside of, and the path of the file
158 | # inside of the archive. However, as archives may be nested, we need
159 | # to check whether we already have an overlay and, if set, use that
160 | # value instead.
161 | if result.overlay:
162 | parent = result.overlay
163 | else:
164 | parent = result.path
165 |
166 | logger.debug(f"Processing {file}, extracted from archive {parent}")
167 | overlay = (
168 | f"{parent}"
169 | f"{ARCHIVE_FILE_SEPARATOR}"
170 | f"{re.sub(rf'^{destination}/?', '', file)}"
171 | )
172 |
173 | # Submit back to the pool for processing.
174 | submission = pool.submit(
175 | metadata, file, overlay=overlay, parent=result.md5
176 | )
177 | futures[submission] = file
178 |
179 | if complete == 0:
180 | break
181 |
182 | return entries
183 |
--------------------------------------------------------------------------------
/stacs/scan/loader/format/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines file format handlers used by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.loader.format import dmg, xar # noqa: F401
7 |
--------------------------------------------------------------------------------
/stacs/scan/loader/format/dmg.py:
--------------------------------------------------------------------------------
1 | """Provides an Apple Disk Image (DMG) parser and extractor.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import bz2
7 | import lzma
8 | import os
9 | import plistlib
10 | import struct
11 | import zlib
12 | from collections import namedtuple
13 | from typing import List
14 |
15 | from pydantic import BaseModel, Extra, Field
16 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
17 |
18 | # Structures names and geometry are via "Demystifying the DMG File Format"
19 | # by Jonathan Levin (http://newosxbook.com/).
20 | DMG_HEADER_MAGIC = b"koly"
21 | DMG_HEADER = ">4sIIIQQQQQII16sII128sQQ120sII128sIQIII"
22 | DMG_HEADER_MAGIC_SZ = len(DMG_HEADER_MAGIC)
23 | DMG_HEADER_SZ = struct.calcsize(DMG_HEADER)
24 |
25 | DMG_BLOCK_TABLE_MAGIC = b"mish"
26 | DMG_BLOCK_TABLE = ">4sIQQQIIIIIIIIII128sI"
27 | DMG_BLOCK_TABLE_MAGIC_SZ = len(DMG_BLOCK_TABLE_MAGIC)
28 | DMG_BLOCK_TABLE_SZ = struct.calcsize(DMG_BLOCK_TABLE)
29 |
30 | DMG_BLOCK_CHUNK = ">I4sQQQQ"
31 | DMG_BLOCK_CHUNK_SZ = struct.calcsize(DMG_BLOCK_CHUNK)
32 |
33 | DMGHeader = namedtuple(
34 | "DMGHeader",
35 | [
36 | "signature",
37 | "version",
38 | "header_size",
39 | "flags",
40 | "running_data_fork_offset",
41 | "data_fork_offset",
42 | "data_fork_length",
43 | "rsrc_fork_offset",
44 | "rsrc_fork_length",
45 | "segment_number",
46 | "segment_count",
47 | "segment_id",
48 | "data_checksum_type",
49 | "data_checksum_size",
50 | "data_checksum",
51 | "xml_offset",
52 | "xml_length",
53 | "reserved_1",
54 | "checksum_Type",
55 | "checksum_Size",
56 | "checksum",
57 | "image_variant",
58 | "sector_count",
59 | "reserved_2",
60 | "reserved_3",
61 | "reserved_4",
62 | ],
63 | )
64 | DMGBlockTable = namedtuple(
65 | "DMGBlockTable",
66 | [
67 | "signature",
68 | "version",
69 | "sector_number",
70 | "sector_count",
71 | "data_offset",
72 | "buffers_needed",
73 | "block_descriptors",
74 | "reserved_1",
75 | "reserved_2",
76 | "reserved_3",
77 | "reserved_4",
78 | "reserved_5",
79 | "reserved_6",
80 | "checksum_ype",
81 | "checksum_ize",
82 | "checksum",
83 | "chunk_count",
84 | ],
85 | )
86 | DMGBlockChunk = namedtuple(
87 | "DMGBlockChunk",
88 | [
89 | "type",
90 | "comment",
91 | "sector_number",
92 | "sector_count",
93 | "compressed_offset",
94 | "compressed_length",
95 | ],
96 | )
97 |
98 |
99 | class DMGBlock(BaseModel, extra=Extra.forbid):
100 | """Expresses a DMG block entry and its chunks."""
101 |
102 | name: str
103 | chunks: List[DMGBlockChunk] = Field([])
104 |
105 |
106 | class DMG:
107 | """Provides an Apple Disk Image (DMG) parser and extractor."""
108 |
109 | def __init__(self, filepath: str):
110 | self.archive = filepath
111 |
112 | try:
113 | with open(self.archive, "rb") as fin:
114 | # DMG metadata is at the end of the file.
115 | fin.seek(-DMG_HEADER_SZ, 2)
116 |
117 | # Ensure the provided file is actually a DMG.
118 | if fin.read(DMG_HEADER_MAGIC_SZ) != DMG_HEADER_MAGIC:
119 | raise InvalidFileException("File does not appear to be a DMG")
120 |
121 | # Rewind and attempt to read in header.
122 | fin.seek(-DMG_HEADER_MAGIC_SZ, 1)
123 | self._header = DMGHeader._make(
124 | struct.unpack(DMG_HEADER, fin.read(DMG_HEADER_SZ))
125 | )
126 |
127 | # Read the XML property list.
128 | fin.seek(self._header.xml_offset, 0)
129 | self._plist = plistlib.loads(fin.read(self._header.xml_length))
130 | except OSError as err:
131 | raise FileAccessException(f"Unable to read archive: {err}")
132 |
133 | def _parse_blocks(self) -> List[DMGBlock]:
134 | """Recursively parse blocks and their associated chunks."""
135 | candidates = []
136 |
137 | # Read the BLKX entries from the resource-fork section of the plist.
138 | for entry in self._plist.get("resource-fork", {}).get("blkx", []):
139 | data = entry.get("Data")
140 | name = entry.get("Name")
141 |
142 | block = DMGBlock(name=name)
143 | table = DMGBlockTable._make(
144 | struct.unpack(DMG_BLOCK_TABLE, data[0:DMG_BLOCK_TABLE_SZ])
145 | )
146 |
147 | # Extract all blocks and their associated chunks from the encoded "Data"
148 | # inside of the extracted plist.
149 | start = DMG_BLOCK_TABLE_SZ
150 |
151 | for _ in range(0, table.chunk_count):
152 | end = start + DMG_BLOCK_CHUNK_SZ
153 | block.chunks.append(
154 | DMGBlockChunk._make(struct.unpack(DMG_BLOCK_CHUNK, data[start:end]))
155 | )
156 | start = end
157 |
158 | candidates.append(block)
159 |
160 | return candidates
161 |
162 | def extract(self, destination):
163 | """Extract all blocks from the DMG to the optional destination directory."""
164 | parent = os.path.basename(self.archive)
165 |
166 | try:
167 | os.makedirs(destination, exist_ok=True)
168 | except OSError as err:
169 | raise FileAccessException(
170 | f"Unable to create directory during extraction: {err}"
171 | )
172 |
173 | # Process each chunk inside of each block. A DMG has multiple blocks, and a
174 | # block has N chunks.
175 | for idx, block in enumerate(self._parse_blocks()):
176 | output = os.path.join(destination, f"{parent}.{idx}.blob")
177 |
178 | for chunk in block.chunks:
179 | # Skip Ignored, Comment, and Last blocks (respectively).
180 | if chunk.type in [0x00000002, 0x7FFFFFFE, 0xFFFFFFFF]:
181 | continue
182 |
183 | try:
184 | with open(self.archive, "rb") as fin, open(output, "ab") as fout:
185 | fin.seek(chunk.compressed_offset)
186 |
187 | # 0x80000005 - Zlib.
188 | if chunk.type == 0x80000005:
189 | fout.write(
190 | zlib.decompress(fin.read(chunk.compressed_length))
191 | )
192 |
193 | # 0x80000005 - BZ2.
194 | if chunk.type == 0x80000006:
195 | fout.write(
196 | bz2.decompress(fin.read(chunk.compressed_length))
197 | )
198 |
199 | # 0x80000005 - LZMA.
200 | if chunk.type == 0x80000008:
201 | fout.write(
202 | lzma.decompress(fin.read(chunk.compressed_length))
203 | )
204 |
205 | # 0x00000000 - Zero Fill.
206 | if chunk.type == 0x00000000:
207 | fout.write(b"\x00" * chunk.compressed_length)
208 | continue
209 | except (OSError, lzma.LZMAError, ValueError) as err:
210 | raise InvalidFileException(err)
211 |
--------------------------------------------------------------------------------
/stacs/scan/loader/format/xar.py:
--------------------------------------------------------------------------------
1 | """Provides an eXtensible ARchive parser and extrator.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import os
7 | import struct
8 | import xml.etree.ElementTree as ET
9 | import zlib
10 | from collections import namedtuple
11 | from typing import List
12 |
13 | from stacs.scan.constants import CHUNK_SIZE
14 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
15 |
16 | XAR_MAGIC = b"xar!"
17 | XAR_HEADER = ">4sHHQQI"
18 | XAR_HEADER_SZ = struct.calcsize(XAR_HEADER)
19 |
20 | # via xar/include/xar.h.in
21 | XARHeader = namedtuple(
22 | "XARHeader",
23 | [
24 | "magic",
25 | "size",
26 | "version",
27 | "toc_length_compressed",
28 | "toc_length_uncompressed",
29 | "cksum_alg",
30 | ],
31 | )
32 |
33 | XAREntry = namedtuple(
34 | "XAREntry",
35 | [
36 | "length",
37 | "offset",
38 | "size",
39 | "encoding",
40 | "archived_cksum_kind",
41 | "archived_cksum",
42 | "path",
43 | "name",
44 | "kind",
45 | ],
46 | )
47 |
48 |
49 | class XAR:
50 | """Provides an eXtensible ARchive Format parser and extrator."""
51 |
52 | def __init__(self, filepath: str):
53 | self.archive = filepath
54 |
55 | try:
56 | with open(self.archive, "rb") as fin:
57 | # Ensure the provided file is actually a XAR.
58 | if fin.read(4) != XAR_MAGIC:
59 | raise InvalidFileException("File does not appear to be a XAR")
60 |
61 | # Rewind and attempt to read in header.
62 | fin.seek(0)
63 | self._header = XARHeader._make(
64 | struct.unpack(XAR_HEADER, fin.read(XAR_HEADER_SZ))
65 | )
66 |
67 | # Read and decompress the table-of-contents.
68 | fin.seek(self._header.size)
69 |
70 | self._toc = ET.fromstring(
71 | str(
72 | zlib.decompress(fin.read(self._header.toc_length_uncompressed)),
73 | "utf-8",
74 | )
75 | )
76 | except zlib.error as err:
77 | raise InvalidFileException(f"Unable to read table-of-contents: {err}")
78 | except OSError as err:
79 | raise FileAccessException(f"Unable to read archive: {err}")
80 |
81 | def _parse_entries(self, root, directory="") -> List[XAREntry]:
82 | """Recursively parse entries from the table-of-contents."""
83 | candidates = []
84 |
85 | # Strip any slashes, only using the last path component.
86 | kind = root.find(".type").text
87 | name = root.find(".name").text.split("/")[-1]
88 | path = os.path.join(directory, name)
89 |
90 | # Recurse for directories
91 | if kind == "directory":
92 | for element in root.findall(".//file"):
93 | candidates.extend(self._parse_entries(element, directory=path))
94 |
95 | if kind == "file":
96 | size = int(root.find(".//data/size").text)
97 | length = int(root.find(".//data/length").text)
98 | offset = int(root.find(".//data/offset").text)
99 | encoding = root.find(".//data/encoding").get("style")
100 | archived_cksum = root.find(".//data/archived-checksum").text
101 | archived_cksum_kind = root.find(".//data/archived-checksum").get("style")
102 |
103 | candidates.append(
104 | XAREntry(
105 | length,
106 | offset,
107 | size,
108 | encoding,
109 | archived_cksum,
110 | archived_cksum_kind,
111 | path,
112 | name,
113 | kind,
114 | )
115 | )
116 |
117 | return candidates
118 |
119 | def entries(self) -> List[XAREntry]:
120 | """Return a list of entries in this XAR."""
121 | candidates = []
122 |
123 | for entry in self._toc.findall("./toc/file"):
124 | candidates.extend(self._parse_entries(entry))
125 |
126 | return candidates
127 |
128 | def extract(self, destination):
129 | """Extract all entries from the XAR to the optional destination directory."""
130 | # Offset must be adjusted by the size of the ToC and the header. This is as the
131 | # offset is from the first byte AFTER the header and compressed ToC.
132 | header_size = self._header.size + self._header.toc_length_compressed
133 |
134 | for entry in self.entries():
135 | parent = os.path.dirname(os.path.join(destination, entry.path))
136 |
137 | try:
138 | os.makedirs(parent, exist_ok=True)
139 | except OSError as err:
140 | raise FileAccessException(
141 | f"Unable to create directory during extraction: {err}"
142 | )
143 |
144 | # Check whether a decompressor should be used.
145 | decompressor = None
146 |
147 | if entry.encoding == "application/x-gzip":
148 | decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 32).decompress
149 |
150 | # Perform extraction.
151 | # TODO: No decompression or integrity checking is performed today, nor are
152 | # ownership and modes followed.
153 | remaining = entry.length
154 |
155 | try:
156 | with open(self.archive, "rb") as fin:
157 | with open(os.path.join(destination, entry.path), "wb") as fout:
158 | fin.seek(header_size + entry.offset)
159 |
160 | # Read all data in chunks to not balloon memory when processing
161 | # large files.
162 | while remaining > 0:
163 | delta = remaining - CHUNK_SIZE
164 | if delta < 0:
165 | read_length = remaining
166 | else:
167 | read_length = CHUNK_SIZE
168 |
169 | # Use a decompressor, if required.
170 | if decompressor:
171 | fout.write(decompressor(fin.read(read_length)))
172 | else:
173 | fout.write(fin.read(read_length))
174 |
175 | remaining -= read_length
176 | except (OSError, zlib.error) as err:
177 | raise InvalidFileException(err)
178 |
--------------------------------------------------------------------------------
/stacs/scan/loader/manifest.py:
--------------------------------------------------------------------------------
1 | """Defines a manifest loader for STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | #
7 | # TODO: Implement the manifest loader. This should take the contents of a manifest
8 | # which matches the stacs.scan.model.manifest.Format schema. It should also check
9 | # whether all requested files exist, and generate MD5 sums for them if not
10 | # specified in the manifest 'Entry' (stacs.scan.model.manifest.Entry).
11 | #
12 |
--------------------------------------------------------------------------------
/stacs/scan/model/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines models used by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.model import finding # noqa: F401
7 | from stacs.scan.model import ignore_list # noqa: F401
8 | from stacs.scan.model import manifest # noqa: F401
9 | from stacs.scan.model import pack # noqa: F401
10 |
--------------------------------------------------------------------------------
/stacs/scan/model/finding.py:
--------------------------------------------------------------------------------
1 | """Defines types to assist with reporting findings.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from typing import List
7 |
8 | from pydantic import BaseModel, Extra, Field
9 |
10 |
11 | class Location(BaseModel, extra=Extra.forbid):
12 | """Defines data associated with a location of a finding."""
13 |
14 | line: int = Field(
15 | None,
16 | title="The line number which contains the finding.",
17 | )
18 | offset: int = Field(
19 | None,
20 | title="The offset from the start of the file of the finding (in bytes).",
21 | )
22 |
23 |
24 | class Source(BaseModel, extra=Extra.forbid):
25 | """Defines data associated with the source of a finding."""
26 |
27 | module: str = Field(
28 | title="The STACS module which generated the finding.",
29 | )
30 | description: str = Field(
31 | None,
32 | title="A description of the finding",
33 | )
34 | reference: str = Field(
35 | title="A reference to the element which generated the finding.",
36 | )
37 | tags: List[str] = Field(
38 | [],
39 | title="A list of tags associated with the finding.",
40 | )
41 | version: str = Field(
42 | None,
43 | title="The version of the element which generated the finding.",
44 | )
45 |
46 |
47 | class Sample(BaseModel, extra=Extra.forbid):
48 | """The content and context of a finding."""
49 |
50 | window: int = Field(
51 | title="The number of bytes before and after a finding included in the sample.",
52 | )
53 | before: str = Field(
54 | title="The contents of N bytes before the finding.",
55 | )
56 | after: str = Field(
57 | title="The contents of N bytes after the finding.",
58 | )
59 | finding: str = Field(
60 | title="The contents of the finding.",
61 | )
62 | binary: bool = Field(
63 | title="Indicates that the finding was binary and is base64 encoded."
64 | )
65 |
66 |
67 | class Ignore(BaseModel, extra=Extra.forbid):
68 | """Defines the ignore schema of a finding."""
69 |
70 | ignored: bool = Field(
71 | False,
72 | title="Whether the finding should be ignored due to allow list.",
73 | )
74 | reason: str = Field(
75 | title="The reason to ignore the finding.",
76 | )
77 |
78 |
79 | class Entry(BaseModel, extra=Extra.forbid):
80 | """Defines the schema of a finding."""
81 |
82 | path: str = Field(
83 | title="The path to the file.",
84 | )
85 | md5: str = Field(
86 | title="The MD5 sum of the file.",
87 | )
88 | confidence: float = Field(
89 | None,
90 | title="The confidence of the finding.",
91 | )
92 | location: Location = Field(
93 | None,
94 | title="The location of the finding in the input file.",
95 | )
96 | sample: Sample = Field(
97 | None,
98 | title="Information relating to the content of the finding.",
99 | )
100 | source: Source = Field(
101 | None,
102 | title="Information about the source of the finding.",
103 | )
104 | ignore: Ignore = Field(
105 | None,
106 | title="Information about whether the entry should be ignored.",
107 | )
108 |
--------------------------------------------------------------------------------
/stacs/scan/model/ignore_list.py:
--------------------------------------------------------------------------------
1 | """Defines types to assist with loading and processing of ignore lists.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import json
7 | import os
8 | from typing import List
9 |
10 | from pydantic import BaseModel, Extra, Field, validator
11 | from stacs.scan.exceptions import IgnoreListException, STACSException
12 |
13 |
14 | class Entry(BaseModel, extra=Extra.forbid):
15 | """Defines the schema of an ignore."""
16 |
17 | path: str = Field(
18 | None,
19 | title="The path of a file to ignore.",
20 | )
21 | pattern: str = Field(
22 | None,
23 | title="A pattern of the file path to ignore.",
24 | )
25 | reason: str = Field(
26 | title="The reason for ignoring the finding.",
27 | )
28 | md5: str = Field(
29 | None,
30 | title="The MD5 sum of the file to ignore.",
31 | )
32 | module: str = Field(
33 | "stacs.scan.scanner.rules",
34 | title="Which module to ignore findings from.",
35 | )
36 | references: List[str] = Field(
37 | [],
38 | title=(
39 | "A list of references to ignore findings from, defaults to all if not set."
40 | ),
41 | )
42 | offset: int = Field(
43 | None,
44 | title="The offset of the specific finding to ignore.",
45 | )
46 |
47 | @validator("path", always=True)
48 | def exclusive_path_or_pattern(cls, value, values):
49 | """Ensure that either path or pattern is provided, not both."""
50 | if values.get("pattern") and value:
51 | raise IgnoreListException(
52 | "Either path OR pattern must be specified, not both."
53 | )
54 |
55 | if values.get("pattern") and not value and not values.get("md5"):
56 | raise IgnoreListException("One of pattern, path, or md5 must be set.")
57 |
58 | return value
59 |
60 | @validator("offset", always=True)
61 | def offset_and_refernces_both_set(cls, value, values):
62 | if value and len(values.get("references")) > 0:
63 | raise IgnoreListException(
64 | "An offset cannot be combined with a list of references."
65 | )
66 |
67 | return value
68 |
69 |
70 | class Format(BaseModel, extra=Extra.forbid):
71 | """Defines the schema of the ignore list."""
72 |
73 | include: List[str] = Field(
74 | [],
75 | title="Define a list of additional ignore lists to include.",
76 | )
77 | ignore: List[Entry] = Field(
78 | [],
79 | title="Define a list of ignore list entries.",
80 | )
81 |
82 |
83 | def from_file(filename: str) -> Format:
84 | """Load an ignore list from file, returning a rendered down and complete list."""
85 | parent_file = os.path.abspath(os.path.expanduser(filename))
86 | parent_path = os.path.dirname(parent_file)
87 |
88 | # Load the parent ignore list, and then recurse as needed to handle includes.
89 | try:
90 | with open(parent_file, "r") as fin:
91 | parent_list = Format(**json.load(fin))
92 |
93 | # Roll over the include list and replace all entries with a fully qualified,
94 | # path, if not already set.
95 | for index, path in enumerate(parent_list.include):
96 | parent_list.include[index] = os.path.expanduser(path)
97 | if not path.startswith("/"):
98 | parent_list.include[index] = os.path.join(parent_path, path)
99 | except (OSError, json.JSONDecodeError) as err:
100 | raise STACSException(err)
101 |
102 | # Recursively load included ignore lists.
103 | for file in parent_list.include:
104 | child_pack = from_file(file)
105 | parent_list.ignore.extend(child_pack.ignore)
106 |
107 | # Finally strip the included ignore lists from the entry, as these have been
108 | # resolved, returning the loaded ignore lists to the caller.
109 | parent_list.include.clear()
110 | return parent_list
111 |
--------------------------------------------------------------------------------
/stacs/scan/model/manifest.py:
--------------------------------------------------------------------------------
1 | """Defines types to assist with loading and processing of manifests.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from typing import List
7 |
8 | from pydantic import BaseModel, Extra, Field
9 |
10 |
11 | class Entry(BaseModel, extra=Extra.forbid):
12 | """Defines the schema of a file to process."""
13 |
14 | path: str = Field(
15 | None,
16 | title="The path to the file on disk.",
17 | )
18 | overlay: str = Field(
19 | None,
20 | title=(
21 | "The overlay path of a file. This is used to generate virtual paths which "
22 | "provider the path to files inside of archives."
23 | ),
24 | )
25 | md5: str = Field(
26 | None,
27 | title="The MD5 sum of the file.",
28 | )
29 | parent: str = Field(
30 | None,
31 | title="The MD5 sum of the file's parent.",
32 | )
33 | mime: str = Field(
34 | None,
35 | title="The mimetype of the file.",
36 | )
37 | size: int = Field(
38 | None,
39 | title="The size of the file.",
40 | )
41 |
42 |
43 | class Format(BaseModel, extra=Extra.forbid):
44 | """Defines the schema of a manifest file."""
45 |
46 | files: List[Entry] = Field(
47 | [],
48 | title="A list of files to scan.",
49 | )
50 |
--------------------------------------------------------------------------------
/stacs/scan/model/pack.py:
--------------------------------------------------------------------------------
1 | """Defines types to assist with loading and processing of rule packs.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import json
7 | import os
8 | from typing import List
9 |
10 | from pydantic import BaseModel, Extra, Field
11 | from stacs.scan.exceptions import STACSException
12 |
13 |
14 | class Entry(BaseModel, extra=Extra.forbid):
15 | """Defines the schema of an allow."""
16 |
17 | module: str = Field(
18 | "rules",
19 | title="Which module the rules are for.",
20 | )
21 | path: str = Field(
22 | None,
23 | title="The path of a the module's rule to load.",
24 | )
25 |
26 |
27 | class Format(BaseModel, extra=Extra.forbid):
28 | """Defines the schema of the rule pack."""
29 |
30 | include: List[str] = Field(
31 | [],
32 | title="Define a list of additional packs to include.",
33 | )
34 | pack: List[Entry] = Field(
35 | [],
36 | title="A list of pack entries.",
37 | )
38 |
39 |
40 | def from_file(filename: str) -> Format:
41 | """Load a pack from file, returning a rendered down and complete pack."""
42 | parent_file = os.path.abspath(os.path.expanduser(filename))
43 | parent_path = os.path.dirname(parent_file)
44 |
45 | # Load the parent pack, and then recurse as needed to handle includes.
46 | try:
47 | with open(parent_file, "r") as fin:
48 | parent_pack = Format(**json.load(fin))
49 |
50 | # Roll over the pack and ensure any entries are fully qualified.
51 | for entry in parent_pack.pack:
52 | entry.path = os.path.expanduser(entry.path)
53 | if not entry.path.startswith("/"):
54 | # Resolve and update the path.
55 | entry.path = os.path.join(parent_path, entry.path)
56 | # Roll over the include list and replace all entries with a fully qualified
57 | # path, if not already set.
58 | for index, path in enumerate(parent_pack.include):
59 | if not path.startswith("/"):
60 | parent_pack.include[index] = os.path.join(parent_path, path)
61 | except (OSError, json.JSONDecodeError) as err:
62 | raise STACSException(err)
63 |
64 | # Recursively load included packs, adding results to the loaded pack.
65 | for file in parent_pack.include:
66 | child_pack = from_file(file)
67 | parent_pack.pack.extend(child_pack.pack)
68 |
69 | # Finally strip the included packs from the entry, as these have been resolved,
70 | # returning the loaded pack to the caller.
71 | parent_pack.include.clear()
72 | return parent_pack
73 |
--------------------------------------------------------------------------------
/stacs/scan/output/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines outputs supported by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.output import markdown # noqa: F401
7 | from stacs.scan.output import pretty # noqa: F401
8 | from stacs.scan.output import sarif # noqa: F401
9 |
--------------------------------------------------------------------------------
/stacs/scan/output/markdown.py:
--------------------------------------------------------------------------------
1 | """Defines a markdown output handler for STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from typing import List
7 |
8 | from stacs.scan import model
9 | from stacs.scan.exceptions import NotImplementedException
10 |
11 |
12 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str:
13 | raise NotImplementedException("Markdown output not yet implemented, sorry!")
14 |
--------------------------------------------------------------------------------
/stacs/scan/output/pretty.py:
--------------------------------------------------------------------------------
1 | import base64
2 | from typing import List
3 |
4 | from colorama import Fore, init
5 | from stacs.scan import helper, model
6 | from stacs.scan.__about__ import __version__
7 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
8 | from stacs.scan.model.finding import Sample
9 |
10 |
11 | def generate_file_tree(virtual_path: str) -> str:
12 | """Returns a tree layout to the virtual path."""
13 | tree = str()
14 | parts = virtual_path.split(ARCHIVE_FILE_SEPARATOR)
15 |
16 | for index, part in enumerate(parts):
17 | # Add some style. Print a package / box before each archive, and a document
18 | # before the file.
19 | if (index + 1) == len(parts):
20 | emoji = "📄"
21 | else:
22 | emoji = "📦"
23 |
24 | tree += f"{' ' * (index * 4)}`-- {emoji} {part}\n"
25 |
26 | return tree.rstrip()
27 |
28 |
29 | def generate_sample(sample: Sample):
30 | """Return a plain-text and text formatted sample."""
31 | # Ensure the sample is nicely base64 encoded if binary, rather than slapping three
32 | # already base64'd strings together.
33 | raw = bytearray()
34 | if sample.binary:
35 | raw.extend(bytearray(base64.b64decode(sample.before)))
36 | raw.extend(bytearray(base64.b64decode(sample.finding)))
37 | raw.extend(bytearray(base64.b64decode(sample.after)))
38 |
39 | return str(base64.b64encode(raw), "utf-8")
40 |
41 | return "".join([sample.before, sample.finding, sample.after])
42 |
43 |
44 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str:
45 | """Render a 'pretty' output to the console for human consumption."""
46 | init()
47 |
48 | # Find all unsuppressed findings, and track them separately.
49 | results = {}
50 | unsuppressed = 0
51 |
52 | for finding in findings:
53 | # Check for suppressions.
54 | if finding.ignore is not None and finding.ignore.ignored:
55 | continue
56 |
57 | # Track it.
58 | unsuppressed += 1
59 |
60 | if results.get(finding.path) is None:
61 | results[finding.path] = []
62 |
63 | # Extract location appropriately.
64 | location = None
65 | if finding.location.line:
66 | location = f"line {finding.location.line}"
67 | else:
68 | location = f"{finding.location.offset}-bytes"
69 |
70 | # Generates all strings for presentation right away.
71 | results[finding.path].append(
72 | {
73 | "tree": generate_file_tree(finding.path),
74 | "path": finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1],
75 | "rule": finding.source.reference,
76 | "text": finding.source.description,
77 | "location": location,
78 | "sample": generate_sample(finding.sample),
79 | }
80 | )
81 |
82 | # Provide a summary.
83 | print(helper.banner(version=__version__))
84 |
85 | if findings == 0:
86 | print("✨ " + Fore.GREEN + "No unsuppressed findings! Great work! ✨\n")
87 | return
88 |
89 | # Render out the findings.
90 | print(
91 | f"{Fore.RED}🔥 There were {unsuppressed} unsuppressed findings in "
92 | f"{len(results)} files 🔥\n"
93 | )
94 |
95 | for candidate in results:
96 | filepath = candidate.split(ARCHIVE_FILE_SEPARATOR)[0]
97 | count = len(results[candidate])
98 |
99 | if ARCHIVE_FILE_SEPARATOR in candidate:
100 | print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath} (Nested)")
101 | else:
102 | print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath}")
103 |
104 | for finding in results[candidate]:
105 | print()
106 | helper.printi(f"{Fore.YELLOW}Reason : {finding['text']}")
107 | helper.printi(f"{Fore.YELLOW}Rule Id : {finding['rule']}")
108 | helper.printi(f"{Fore.YELLOW}Location : {finding['location']}\n\n")
109 | helper.printi(f"{Fore.YELLOW}Filetree:\n\n")
110 | helper.printi(
111 | finding["tree"],
112 | prefix=f" {Fore.RESET}|{Fore.BLUE}",
113 | )
114 | print()
115 | helper.printi(f"{Fore.YELLOW}Sample:\n\n")
116 | helper.printi(
117 | f"... {finding['sample']} ...",
118 | prefix=f" {Fore.RESET}|{Fore.BLUE}",
119 | )
120 | print()
121 |
122 | print(f"\n{Fore.RESET}{'-' * 78}\n")
123 |
--------------------------------------------------------------------------------
/stacs/scan/output/sarif.py:
--------------------------------------------------------------------------------
1 | """Defines a SARIF output handler for STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import base64
7 | import json
8 | import re
9 | from typing import Any, Dict, List, Optional, Tuple
10 |
11 | from stacs.scan import __about__, model
12 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
13 |
14 | # Only one SARIF version will be supported at a time.
15 | SARIF_VERSION = "2.1.0"
16 | SARIF_SCHEMA_URI = "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json"
17 | SARIF_URI_BASE_ID = "STACSROOT"
18 |
19 |
20 | def confidence_to_level(confidence: int) -> str:
21 | """Maps the confidence of a finding to a SARIF level."""
22 | if confidence < 70:
23 | return "warning"
24 | else:
25 | return "error"
26 |
27 |
28 | def render_artifact(path: str, parent: Optional[int] = None) -> Dict[str, Any]:
29 | """Create a new artifact entry."""
30 | artifact = {
31 | "location": {
32 | "uri": path,
33 | "uriBaseId": SARIF_URI_BASE_ID,
34 | },
35 | }
36 |
37 | if parent is not None:
38 | artifact["parentIndex"] = parent
39 |
40 | return artifact
41 |
42 |
43 | def path_in_artifacts(path: str, artifacts: List[Dict[str, Any]], parent) -> int:
44 | """Checks if a path exists in the artifacts list."""
45 | for index, artifact in enumerate(artifacts):
46 | if path == artifact["location"]["uri"]:
47 | # Short circuit if we both don't have a parent.
48 | if artifact.get("parentIndex", None) is None and parent is None:
49 | return index
50 |
51 | # Check common ancestry.
52 | try:
53 | their_parent = artifact.get("parentIndex", None)
54 | our_parent = parent
55 |
56 | while True:
57 | if our_parent == their_parent:
58 | their_parent = artifacts[their_parent]["parentIndex"]
59 | our_parent = artifacts[our_parent]["parentIndex"]
60 | else:
61 | break
62 | except KeyError:
63 | # We're good all the way back to the root.
64 | return index
65 |
66 | return None
67 |
68 |
69 | def add_artifact(
70 | root: str,
71 | finding: model.finding.Entry,
72 | artifacts: List[Dict[str, Any]],
73 | ) -> Tuple[int, List[Dict[str, Any]]]:
74 | """Generates SARIF artifact entires for findings (SARIF v2.1.0 Section 3.24)."""
75 | parent = None
76 |
77 | for real_path in finding.path.split(ARCHIVE_FILE_SEPARATOR):
78 | # Strip the scan directory root from the path for Base URIs to work properly.
79 | path = re.sub(rf"^{root}", "", real_path).lstrip("/")
80 |
81 | # Check if the path already exists.
82 | new_parent = path_in_artifacts(path, artifacts, parent)
83 | if new_parent is not None:
84 | parent = new_parent
85 | continue
86 |
87 | artifacts.append(render_artifact(path, parent))
88 | parent = len(artifacts) - 1
89 |
90 | # Add metadata to this entry, if missing.
91 | artifacts[parent]["hashes"] = {
92 | "md5": finding.md5,
93 | }
94 | return (parent, artifacts)
95 |
96 |
97 | def render(
98 | root: str, findings: List[model.finding.Entry], pack: model.pack.Format
99 | ) -> str:
100 | """Renders down a SARIF document for STACS findings."""
101 | rules = []
102 | results = []
103 | artifacts = []
104 |
105 | # Generate a result (SARIF v2.1.0 Section 3.27) for each finding.
106 | for finding in findings:
107 | # Suppressions (SARIF v2.1.0 Section 3.27.23) are used to track findings where
108 | # there is an "ignore" set - via ignore list.
109 | suppressions = []
110 |
111 | # Create an artifactContent (SARIF v2.1.0 Section 3.3) entry to track the sample
112 | # of the finding.
113 | context_content = {}
114 | artifact_content = {}
115 |
116 | if finding.sample.binary:
117 | artifact_content["binary"] = finding.sample.finding
118 | # Unencode and then re-encode the sample into a single B64 string to provide
119 | # context.
120 | context_content["binary"] = str(
121 | base64.b64encode(
122 | base64.b64decode(finding.sample.before)
123 | + base64.b64decode(finding.sample.finding)
124 | + base64.b64decode(finding.sample.after)
125 | ),
126 | "utf-8",
127 | )
128 | else:
129 | artifact_content["text"] = finding.sample.finding
130 | context_content["text"] = (
131 | finding.sample.before + finding.sample.finding + finding.sample.after
132 | )
133 |
134 | # Create a new contextRegion (SARIF v2.1.0 Section 3.29.5) to provide contextual
135 | # information about the finding, but do not include the byte or line number
136 | # offset.
137 | context = {"snippet": context_content}
138 |
139 | # Create a new region (SARIF v2.1.0 Section 3.30) to track the location of the
140 | # finding and the sample.
141 | region = {
142 | "byteOffset": finding.location.offset,
143 | "snippet": artifact_content,
144 | }
145 |
146 | # Line numbers are optional, as the input file may be binary.
147 | if finding.location.line:
148 | region["startLine"] = finding.location.line
149 |
150 | # Add a new artifact for this finding, or retrieve the location of the existing.
151 | index, artifacts = add_artifact(root, finding, artifacts)
152 |
153 | # Strip the scan directory root from the path, as the we're using the reference
154 | # from originalUriBaseIds (SARIF v2.1.0 Section 3.14.14) to allow "portability".
155 | path = finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1]
156 | relative_path = re.sub(rf"^{root}", "", path).lstrip("/")
157 |
158 | # Pin the artifact location back to a physical location (SARIF v2.1.0 Section
159 | # 3.28.3).
160 | physical_location = {
161 | "physicalLocation": {
162 | "region": region,
163 | "contextRegion": context,
164 | "artifactLocation": {
165 | "uri": relative_path,
166 | "index": index,
167 | "uriBaseId": SARIF_URI_BASE_ID,
168 | },
169 | },
170 | }
171 |
172 | # Generate a new Rule entry, if required (SARIF v2.1.0 Section 3.49).
173 | rule = None
174 |
175 | for candidate in rules:
176 | if finding.source.reference == candidate.get("id"):
177 | rule = candidate
178 | break
179 |
180 | if not rule:
181 | # Add the description from the original rule pack entry into the Rule for
182 | # easy tracking.
183 | rule = {
184 | "id": finding.source.reference,
185 | "shortDescription": {
186 | "text": finding.source.description,
187 | },
188 | }
189 | rules.append(rule)
190 |
191 | # Add a Suppression entry if this finding was marked as "Ignored", along with
192 | # the reason (justification) from the original ignore list.
193 | if finding.ignore is not None and finding.ignore.ignored:
194 | suppressions.append(
195 | {
196 | "kind": "external",
197 | "status": "accepted",
198 | "justification": finding.ignore.reason,
199 | }
200 | )
201 |
202 | # Track the finding (Result).
203 | results.append(
204 | {
205 | "message": rule.get("shortDescription"),
206 | "level": confidence_to_level(finding.confidence),
207 | "ruleId": finding.source.reference,
208 | "locations": [
209 | physical_location,
210 | ],
211 | "suppressions": suppressions,
212 | }
213 | )
214 |
215 | # Add a toolComponent (SARIF v2.1.0 Section 3.19), and bolt it all together.
216 | tool = {
217 | "driver": {
218 | "name": __about__.__title__.upper(),
219 | "rules": rules,
220 | "version": __about__.__version__,
221 | "downloadUri": __about__.__uri__,
222 | "informationUri": __about__.__uri__,
223 | },
224 | }
225 | run = {
226 | "tool": tool,
227 | "results": results,
228 | "artifacts": artifacts,
229 | "originalUriBaseIds": {
230 | SARIF_URI_BASE_ID: {
231 | "uri": f"file://{root.rstrip('/')}/",
232 | },
233 | },
234 | }
235 | sarif = {
236 | "version": SARIF_VERSION,
237 | "$schema": SARIF_SCHEMA_URI,
238 | "runs": [
239 | run,
240 | ],
241 | }
242 |
243 | # Return a stringified JSON representation of the SARIF document.
244 | return json.dumps(sarif)
245 |
--------------------------------------------------------------------------------
/stacs/scan/scanner/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines scanners used by STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from stacs.scan.scanner import rules
7 |
8 | # Export all enabled scanners.
9 | __all__ = [
10 | "rules",
11 | ]
12 |
--------------------------------------------------------------------------------
/stacs/scan/scanner/rules.py:
--------------------------------------------------------------------------------
1 | """Implements a rules based scanner for STACS.
2 |
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import base64
7 | import hashlib
8 | import os
9 | from concurrent.futures import ThreadPoolExecutor, as_completed
10 | from typing import List
11 |
12 | import yara
13 |
14 | from stacs.scan.constants import CHUNK_SIZE, WINDOW_SIZE
15 | from stacs.scan.exceptions import FileAccessException, InvalidFormatException
16 | from stacs.scan.loader import archive
17 | from stacs.scan.model import finding, manifest, pack
18 |
19 |
20 | def is_binary(target: manifest.Entry) -> bool:
21 | """Attempts to determine if a target file is binary."""
22 | # This is a bit false positive prone, as some "application" mime types are text.
23 | # However, as we only support a finite number of formats, we should be safe to do
24 | # this (for now!)
25 | binary_types = (
26 | "application",
27 | "image",
28 | "audio",
29 | "video",
30 | )
31 | if target.mime and target.mime.startswith(binary_types):
32 | return True
33 |
34 | # Otherwise, we'll try and read some data as text and see. This could fail if a
35 | # binary contained readable text for 10 * CHUNK_SIZE.
36 | try:
37 | with open(target.path, "r") as fin:
38 | for _ in range(0, 10):
39 | fin.read(CHUNK_SIZE)
40 | except UnicodeDecodeError:
41 | return True
42 |
43 | # Define to text.
44 | return False
45 |
46 |
47 | def generate_sample(target: manifest.Entry, offset: int, size: int) -> finding.Sample:
48 | """Generates a sample for a finding."""
49 | binary = is_binary(target)
50 |
51 | before = bytes()
52 | after = bytes()
53 | entry = bytes()
54 |
55 | try:
56 | # Make sure we don't try and read past the beginning and end of the file.
57 | target_sz = os.stat(target.path).st_size
58 |
59 | if offset - WINDOW_SIZE < 0:
60 | before_sz = offset
61 | before_offset = 0
62 | else:
63 | before_sz = WINDOW_SIZE
64 | before_offset = offset - before_sz
65 |
66 | # Ensure we read N bytes AFTER the entire match, not after the first byte of the
67 | # match.
68 | if offset + size + WINDOW_SIZE > target_sz:
69 | after_sz = target_sz - (offset + size)
70 | after_offset = target_sz - after_sz
71 | else:
72 | after_sz = WINDOW_SIZE
73 | after_offset = offset + size
74 |
75 | with open(target.path, "rb") as fin:
76 | # Seek to and read in the context before.
77 | fin.seek(before_offset)
78 | before = fin.read(before_sz)
79 |
80 | # Read the finding match itself. We have this already from yara, but we're
81 | # already here so we may as well.
82 | fin.seek(offset)
83 | entry = fin.read(size)
84 |
85 | # Seek to and read in the context after the finding.
86 | fin.seek(after_offset)
87 | after = fin.read(after_sz)
88 | except OSError as err:
89 | raise FileAccessException(err)
90 |
91 | if not binary:
92 | try:
93 | return finding.Sample(
94 | window=WINDOW_SIZE,
95 | before=str(before, "utf-8"),
96 | after=str(after, "utf-8"),
97 | finding=str(entry, "utf-8"),
98 | binary=binary,
99 | )
100 | except UnicodeDecodeError:
101 | # Fall through and return a base64 encoded sample.
102 | pass
103 |
104 | return finding.Sample(
105 | window=WINDOW_SIZE,
106 | before=base64.b64encode(before),
107 | after=base64.b64encode(after),
108 | finding=base64.b64encode(entry),
109 | binary=binary,
110 | )
111 |
112 |
113 | def generate_location(target: manifest.Entry, offset: int) -> finding.Location:
114 | """Generates a location for a finding."""
115 | # If the file is binary, we can't generate a line number so we already have the data
116 | # we need.
117 | if is_binary(target):
118 | return finding.Location(offset=offset)
119 |
120 | # Attempt to generate a line number for the finding.
121 | bytes_read = 0
122 | line_number = 1
123 | try:
124 | with open(target.path, "r") as fin:
125 | # Read in chunks, counting the number of newline characters up to the chunk
126 | # which includes the finding.
127 | while bytes_read < offset:
128 | bytes_read += CHUNK_SIZE
129 |
130 | if bytes_read > offset:
131 | line_number += fin.read(offset).count("\n")
132 | else:
133 | line_number += fin.read(CHUNK_SIZE).count("\n")
134 | except UnicodeDecodeError:
135 | # It's possible to get into a state where the detected mime-type of a file is
136 | # incorrect, resulting in unprocessable binary data making it here. In these
137 | # cases we'll just bail early and report the number of bytes into the file of
138 | # the finding. Exactly as we do for known binary files.
139 | return finding.Location(offset=offset)
140 | except OSError as err:
141 | raise FileAccessException(err)
142 |
143 | return finding.Location(offset=offset, line=line_number)
144 |
145 |
146 | def generate_findings(target: manifest.Entry, match: yara.Match) -> List[finding.Entry]:
147 | """Attempts to create findings based on matches inside of the target file."""
148 | findings = []
149 |
150 | # Generate a new finding entry for each matched string. This is in order to ensure
151 | # that multiple findings in the same file are listed separately - as they may be
152 | # different credentials.
153 | for offset, _, entry in match.strings:
154 | location = generate_location(target, offset)
155 | sample = generate_sample(target, offset, len(entry))
156 |
157 | # Add on information about the origin of the finding (that's us!)
158 | source = finding.Source(
159 | module=__name__,
160 | reference=match.rule,
161 | tags=match.tags,
162 | version=match.meta.get("version", "UNKNOWN"),
163 | description=match.meta.get("description"),
164 | )
165 | findings.append(
166 | finding.Entry(
167 | md5=target.md5,
168 | path=target.overlay if target.overlay else target.path,
169 | confidence=match.meta.get("accuracy", 50),
170 | source=source,
171 | sample=sample,
172 | location=location,
173 | )
174 | )
175 |
176 | return findings
177 |
178 |
179 | def matcher(target: manifest.Entry, ruleset: yara.Rules) -> List[finding.Entry]:
180 | findings = []
181 |
182 | for match in ruleset.match(target.path):
183 | findings.extend(generate_findings(target, match))
184 |
185 | return findings
186 |
187 |
188 | def run(
189 | targets: List[manifest.Entry],
190 | pack: pack.Format,
191 | workers: int = 10,
192 | skip_on_eacces: bool = True,
193 | ) -> List[finding.Entry]:
194 | """
195 | Executes the rules based matcher on all input files, returning a list of finding
196 | Entry objects.
197 | """
198 | findings = []
199 |
200 | # Load and compile all YARA rules up front.
201 | namespaces = dict()
202 |
203 | for rule in pack.pack:
204 | namespace = hashlib.md5(bytes(rule.path, "utf-8")).hexdigest()
205 | namespaces[namespace] = rule.path
206 |
207 | try:
208 | ruleset = yara.compile(filepaths=namespaces)
209 | except yara.Error as err:
210 | raise InvalidFormatException(err)
211 |
212 | # Run the matcher in a thread pool as we're likely to be I/O bound.
213 | with ThreadPoolExecutor(max_workers=workers) as pool:
214 | futures = []
215 |
216 | # Reject any input files which are supported archives. This is as we should have
217 | # unpacked versions of these to process, which allows matching the specific file
218 | # with a finding, rather than a finding on an archive.
219 | #
220 | # NOTE: Credentials stuffed into metadata of supported archive formats which
221 | # support archive metadata (such as Zip's "Extra") will not be found.
222 | #
223 | for target in targets:
224 | if target.mime not in archive.MIME_TYPE_HANDLERS:
225 | futures.append(pool.submit(matcher, target, ruleset))
226 |
227 | for future in as_completed(futures):
228 | try:
229 | findings.extend(future.result())
230 | except FileAccessException:
231 | if not skip_on_eacces:
232 | raise
233 |
234 | return findings
235 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/__init__.py
--------------------------------------------------------------------------------
/tests/fixtures/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/fixtures/.gitignore
--------------------------------------------------------------------------------
/tests/fixtures/findings/001.txt:
--------------------------------------------------------------------------------
1 | Credential is at the end of file, with less than the WINDOW_SIZE available for a sample
2 | this should cause the after window to be reduced to only match the bytes remaining after
3 | the finding.
4 |
5 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
6 |
--------------------------------------------------------------------------------
/tests/fixtures/findings/002.txt:
--------------------------------------------------------------------------------
1 | X
2 |
3 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
4 |
5 | There is plenty of data after the finding, but the sample before the finding should not
6 | try and read past the start of the file.
7 |
--------------------------------------------------------------------------------
/tests/fixtures/findings/003.txt:
--------------------------------------------------------------------------------
1 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
--------------------------------------------------------------------------------
/tests/fixtures/findings/004.txt:
--------------------------------------------------------------------------------
1 | There is both plenty of data before and after the finding, so samples should operate
2 | properly and capture WINDOW_SIZE before and after.
3 |
4 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
5 |
6 | Unlike the previous finding fixture (003) where the file ONLY contains the finding with
7 | no additional data.
8 |
--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/001-simple.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "ignore": [
3 | {
4 | "pattern": "src/crypto/rsa\\.c",
5 | "reason": "PEM format RSA header and trailer constants due to parser."
6 | }
7 | ]
8 | }
--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-framework.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "ignore": [
4 | {
5 | "pattern": ".*/tests/.*",
6 | "reason": "Test fixtures contain example credentials."
7 | }
8 | ]
9 | }
--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-project.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "002-framework.valid.json",
4 | "002-system.valid.json"
5 | ],
6 | "ignore": []
7 | }
--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-system.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "ignore": [
4 | {
5 | "pattern": ".*/libexample\\.so$",
6 | "reason": "libexample contains false positives due to a reason."
7 | },
8 | {
9 | "pattern": ".*/libssl.*?\\.so$",
10 | "reason": "Ignore all hash rules inside version X.Y.Z of libssl",
11 | "references": [
12 | "CredentialHashMD5",
13 | "CredentialHashSHA1",
14 | "CredentialHashSHA256",
15 | "CredentialHashSHA512"
16 | ]
17 | },
18 | {
19 | "md5": "e95348ed81f439d0a73a18835bd78eec",
20 | "reason": "Ignore all hash rules inside version X.Y.Z of example file",
21 | "references": [
22 | "CredentialHashMD5",
23 | "CredentialHashSHA1",
24 | "CredentialHashSHA256",
25 | "CredentialHashSHA512"
26 | ]
27 | }
28 | ]
29 | }
--------------------------------------------------------------------------------
/tests/fixtures/pack/001-simple.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "pack": [
4 | {
5 | "module": "rules",
6 | "path": "all.yar"
7 | }
8 | ]
9 | }
--------------------------------------------------------------------------------
/tests/fixtures/pack/002-cloud.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "pack": [
4 | {
5 | "module": "rules",
6 | "path": "credential/cloud/aws/access_key.yar"
7 | },
8 | {
9 | "module": "rules",
10 | "path": "credential/cloud/gcp/service_account.yar"
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/tests/fixtures/pack/002-parent.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "002-cloud.valid.json",
4 | "002-pki.valid.json"
5 | ],
6 | "pack": []
7 | }
8 |
--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki-dsa.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "pack": [
4 | {
5 | "module": "rules",
6 | "path": "credential/pki/dsa/der.yar"
7 | },
8 | {
9 | "module": "rules",
10 | "path": "credential/pki/dsa/pem.yar"
11 | }
12 | ]
13 | }
--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki-rsa.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [],
3 | "pack": [
4 | {
5 | "module": "rules",
6 | "path": "credential/pki/rsa/der.yar"
7 | },
8 | {
9 | "module": "rules",
10 | "path": "credential/pki/rsa/pem.yar"
11 | }
12 | ]
13 | }
--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki.valid.json:
--------------------------------------------------------------------------------
1 | {
2 | "include": [
3 | "002-pki-rsa.valid.json",
4 | "002-pki-dsa.valid.json"
5 | ],
6 | "pack": []
7 | }
--------------------------------------------------------------------------------
/tests/test_filter_ignore_list.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS ignore list filter."""
2 |
3 | import os
4 | import unittest
5 |
6 | import stacs.scan
7 |
8 |
9 | class STACSFilterIgnoreListTestCase(unittest.TestCase):
10 | """Tests the STACS ignore list filter."""
11 |
12 | def setUp(self):
13 | """Ensure the application is setup for testing."""
14 | self.fixtures_path = os.path.join(
15 | os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/"
16 | )
17 |
18 | def tearDown(self):
19 | """Ensure everything is torn down between tests."""
20 | pass
21 |
22 | def test_by_path(self):
23 | """Validate whether path filters are working."""
24 | # Use the same fixture for all branches.
25 | finding = stacs.scan.model.finding.Entry(
26 | path="/a/a",
27 | md5="fa19207ef28b6a97828e3a22b11290e9",
28 | location=stacs.scan.model.finding.Location(
29 | offset=300,
30 | ),
31 | source=stacs.scan.model.finding.Source(
32 | module="stacs.scan.scanner.rules",
33 | reference="SomeRule",
34 | ),
35 | )
36 |
37 | # Define ignores which should correctly be ignored.
38 | hits = [
39 | # Path matches, no other constraint.
40 | stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test"),
41 | # Path matches, reference matches.
42 | stacs.scan.model.ignore_list.Entry(
43 | path="/a/a", reason="Test", references=["SomeRule", "OtherRule"]
44 | ),
45 | # Path matches, offset matches.
46 | stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test", offset=300),
47 | ]
48 |
49 | # Path differs.
50 | miss = stacs.scan.model.ignore_list.Entry(path="/a/b", reason="Test")
51 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
52 |
53 | # Path matches, reference differs.
54 | miss = stacs.scan.model.ignore_list.Entry(
55 | path="/a/a", reason="Test", references=["OtherRule"]
56 | )
57 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
58 |
59 | # Path matches, offset differs.
60 | miss = stacs.scan.model.ignore_list.Entry(
61 | path="/a/a", reason="Test", offset=1234
62 | )
63 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
64 |
65 | # Ensure all hit entries are matches.
66 | for hit in hits:
67 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, hit), True)
68 |
69 | def test_by_pattern(self):
70 | """Validate whether pattern filters are working."""
71 | # Use the same fixture for all branches.
72 | finding = stacs.scan.model.finding.Entry(
73 | path="/a/tests/a",
74 | md5="fa19207ef28b6a97828e3a22b11290e9",
75 | location=stacs.scan.model.finding.Location(
76 | offset=300,
77 | ),
78 | source=stacs.scan.model.finding.Source(
79 | module="stacs.scan.scanner.rules",
80 | reference="SomeRule",
81 | ),
82 | )
83 |
84 | # Pattern matches, no other constraint.
85 | hit = stacs.scan.model.ignore_list.Entry(pattern=".*/tests/.*", reason="Test")
86 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
87 |
88 | # Pattern matches, reference matches.
89 | hit = stacs.scan.model.ignore_list.Entry(
90 | pattern=".*/tests/.*",
91 | reason="Test",
92 | references=["SomeRule", "OtherRule"],
93 | )
94 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
95 |
96 | # Pattern matches, offset matches.
97 | hit = stacs.scan.model.ignore_list.Entry(
98 | pattern=".*/tests/.*", reason="Test", offset=300
99 | )
100 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
101 |
102 | # Pattern differs.
103 | miss = stacs.scan.model.ignore_list.Entry(pattern=r"\.shasums$", reason="Test")
104 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
105 |
106 | # Pattern matches, reference differs.
107 | miss = stacs.scan.model.ignore_list.Entry(
108 | pattern=".*/tests/.*", reason="Test", references=["OtherRule"]
109 | )
110 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
111 |
112 | # Pattern matches, offset differs.
113 | miss = stacs.scan.model.ignore_list.Entry(
114 | pattern=".*/tests/.*", reason="Test", offset=1234
115 | )
116 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
117 |
118 | def test_by_hash(self):
119 | """Validate whether hash filters are working."""
120 | # Use the same fixture for all branches.
121 | finding = stacs.scan.model.finding.Entry(
122 | path="/a/tests/a",
123 | md5="fa19207ef28b6a97828e3a22b11290e9",
124 | location=stacs.scan.model.finding.Location(
125 | offset=300,
126 | ),
127 | source=stacs.scan.model.finding.Source(
128 | module="stacs.scan.scanner.rules",
129 | reference="SomeRule",
130 | ),
131 | )
132 |
133 | # Hash matches, no other constraint.
134 | hit = stacs.scan.model.ignore_list.Entry(
135 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test"
136 | )
137 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
138 |
139 | # Hash matches, reference matches.
140 | hit = stacs.scan.model.ignore_list.Entry(
141 | md5="fa19207ef28b6a97828e3a22b11290e9",
142 | reason="Test",
143 | references=["SomeRule", "OtherRule"],
144 | )
145 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
146 |
147 | # Hash matches, offset matches.
148 | hit = stacs.scan.model.ignore_list.Entry(
149 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=300
150 | )
151 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
152 |
153 | # Hash differs.
154 | miss = stacs.scan.model.ignore_list.Entry(
155 | md5="cf42e6f36da80658591489975bbd845b", reason="Test"
156 | )
157 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
158 |
159 | # Hash matches, reference differs.
160 | miss = stacs.scan.model.ignore_list.Entry(
161 | md5="fa19207ef28b6a97828e3a22b11290e9",
162 | reason="Test",
163 | references=["OtherRule"],
164 | )
165 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
166 |
167 | # Hash matches, offset differs.
168 | miss = stacs.scan.model.ignore_list.Entry(
169 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=1234
170 | )
171 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
172 |
--------------------------------------------------------------------------------
/tests/test_loader_filepath.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS filepath loader."""
2 |
3 | import os
4 | import unittest
5 |
6 |
7 | class STACSLoaderFilepathTestCase(unittest.TestCase):
8 | """Tests the STACS filepath loader."""
9 |
10 | def setUp(self):
11 | """Ensure the application is setup for testing."""
12 | self.fixtures_path = os.path.join(
13 | os.path.dirname(os.path.abspath(__file__)), "fixtures/"
14 | )
15 |
16 | def tearDown(self):
17 | """Ensure everything is torn down between tests."""
18 | pass
19 |
--------------------------------------------------------------------------------
/tests/test_model_ignore_list.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS allow list model and validator."""
2 |
3 | import json
4 | import os
5 | import unittest
6 |
7 | import stacs.scan
8 |
9 |
10 | class STACSModelAllowListTestCase(unittest.TestCase):
11 | """Tests the STACS allow list model and validator."""
12 |
13 | def setUp(self):
14 | """Ensure the application is setup for testing."""
15 | self.fixtures_path = os.path.join(
16 | os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/"
17 | )
18 |
19 | def tearDown(self):
20 | """Ensure everything is torn down between tests."""
21 | pass
22 |
23 | def test_simple(self):
24 | """Ensure that simple allow lists can be loaded."""
25 | with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f:
26 | stacs.scan.model.ignore_list.Format(**json.load(f))
27 |
28 | def test_hierarchical_loading(self):
29 | """Ensure that hierarchical allow lists can be loaded."""
30 | with open(os.path.join(self.fixtures_path, "002-project.valid.json"), "r") as f:
31 | stacs.scan.model.ignore_list.Format(**json.load(f))
32 |
--------------------------------------------------------------------------------
/tests/test_model_pack.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS pack model and validator."""
2 |
3 | import json
4 | import os
5 | import unittest
6 |
7 | import stacs.scan
8 |
9 |
10 | class STACSModelPackTestCase(unittest.TestCase):
11 | """Tests the STACS pack model and validator."""
12 |
13 | def setUp(self):
14 | """Ensure the application is setup for testing."""
15 | self.fixtures_path = os.path.join(
16 | os.path.dirname(os.path.abspath(__file__)), "fixtures/pack/"
17 | )
18 |
19 | def tearDown(self):
20 | """Ensure everything is torn down between tests."""
21 | pass
22 |
23 | def test_simple_pack(self):
24 | """Ensure that simple packs can be loaded."""
25 | with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f:
26 | stacs.scan.model.pack.Format(**json.load(f))
27 |
--------------------------------------------------------------------------------
/tests/test_output_sarif.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS SARIF output module."""
2 |
3 | import unittest
4 |
5 | import stacs.scan
6 |
7 |
8 | class STACSOutputSARIFTestCase(unittest.TestCase):
9 | """Tests the STACS SARIF output module."""
10 |
11 | def setUp(self):
12 | """Ensure the application is setup for testing."""
13 | pass
14 |
15 | def tearDown(self):
16 | """Ensure everything is torn down between tests."""
17 | pass
18 |
19 | def test_add_artifact(self):
20 | """Ensure that artifact entries are deduplicated by their full path."""
21 | findings = [
22 | stacs.scan.model.finding.Entry(
23 | path="/tmp/rootfs/etc/passwd",
24 | md5="b39bfc0e26a30024c76e4dcb8a1eae87",
25 | ),
26 | stacs.scan.model.finding.Entry(
27 | path="/tmp/rootfs/etc/passwd",
28 | md5="b39bfc0e26a30024c76e4dcb8a1eae87",
29 | ),
30 | stacs.scan.model.finding.Entry(
31 | path="/tmp/rootfs/a.tar.gz!a.tar!cred",
32 | md5="bf072e9119077b4e76437a93986787ef",
33 | ),
34 | stacs.scan.model.finding.Entry(
35 | path="/tmp/rootfs/a.tar.gz!a.tar!b_cred",
36 | md5="30cf3d7d133b08543cb6c8933c29dfd7",
37 | ),
38 | stacs.scan.model.finding.Entry(
39 | path="/tmp/rootfs/b.tar.gz!b_cred",
40 | md5="57b8d745384127342f95660d97e1c9c2",
41 | ),
42 | stacs.scan.model.finding.Entry(
43 | path="/tmp/rootfs/b.tar.gz!a.tar!cred",
44 | md5="787c9a8e2148e711f6e9f44696cf341f",
45 | ),
46 | stacs.scan.model.finding.Entry(
47 | path="/tmp/rootfs/a.tar.gz!a.tar!b.tar.gz!b.tar!pass",
48 | md5="d2a33790e5bf28b33cdbf61722a06989",
49 | ),
50 | ]
51 |
52 | # Ensure we get the expected number of artifacts in the artifacts list.
53 | artifacts = []
54 | for finding in findings:
55 | _, artifacts = stacs.scan.output.sarif.add_artifact(
56 | "/tmp/rootfs/", finding, artifacts
57 | )
58 |
59 | # Ensure findings are unfurled into the expected number of unique artifacts.
60 | self.assertEqual(len(artifacts), 12)
61 |
--------------------------------------------------------------------------------
/tests/test_scanner_rule.py:
--------------------------------------------------------------------------------
1 | """Tests the STACS Scanner Rule module."""
2 |
3 | import os
4 | import unittest
5 |
6 | import stacs.scan
7 |
8 |
9 | class STACSScannerRuleTestCase(unittest.TestCase):
10 | """Tests the STACS Scanner Rule module."""
11 |
12 | def setUp(self):
13 | """Ensure the application is setup for testing."""
14 | self.fixtures_path = os.path.join(
15 | os.path.dirname(os.path.abspath(__file__)), "fixtures/findings/"
16 | )
17 |
18 | def tearDown(self):
19 | """Ensure everything is torn down between tests."""
20 | pass
21 |
22 | def test_generate_sample(self):
23 | """Ensures that samples are correctly generated."""
24 | reduced_after_finding = stacs.scan.model.manifest.Entry(
25 | path=os.path.join(self.fixtures_path, "001.txt")
26 | )
27 | reduced_before_finding = stacs.scan.model.manifest.Entry(
28 | path=os.path.join(self.fixtures_path, "002.txt")
29 | )
30 | only_finding = stacs.scan.model.manifest.Entry(
31 | path=os.path.join(self.fixtures_path, "003.txt")
32 | )
33 | sufficent_before_after_finding = stacs.scan.model.manifest.Entry(
34 | path=os.path.join(self.fixtures_path, "004.txt")
35 | )
36 |
37 | # Check that the correct number of bytes were extracted before and after the
38 | # respective findings.
39 | context = stacs.scan.scanner.rules.generate_sample(
40 | reduced_after_finding,
41 | 191, # Offset.
42 | 40, # Size.
43 | )
44 | self.assertEqual(len(context.before), 20)
45 | self.assertEqual(len(context.finding), 40)
46 | self.assertEqual(len(context.after), 1)
47 |
48 | context = stacs.scan.scanner.rules.generate_sample(
49 | reduced_before_finding,
50 | 3, # Offset.
51 | 40, # Size.
52 | )
53 | self.assertEqual(len(context.before), 3)
54 | self.assertEqual(len(context.finding), 40)
55 | self.assertEqual(len(context.after), 20)
56 |
57 | context = stacs.scan.scanner.rules.generate_sample(
58 | only_finding,
59 | 0, # Offset.
60 | 40, # Size.
61 | )
62 | self.assertEqual(len(context.before), 0)
63 | self.assertEqual(len(context.finding), 40)
64 | self.assertEqual(len(context.after), 0)
65 |
66 | context = stacs.scan.scanner.rules.generate_sample(
67 | sufficent_before_after_finding,
68 | 137, # Offset.
69 | 40, # Size.
70 | )
71 | self.assertEqual(len(context.before), 20)
72 | self.assertEqual(len(context.finding), 40)
73 | self.assertEqual(len(context.after), 20)
74 |
--------------------------------------------------------------------------------
/wrapper/stacs-scan:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | # This wrapper is used to determine whether a stacs ignore is present in the scan
4 | # directory.
5 | #
6 |
7 | SCAN_DIR="/mnt/stacs/input"
8 |
9 | # Define additional flags to pass.
10 | STACS_FLAGS=""
11 |
12 | if [ ${STACS_SKIP_UNPROCESSABLE:-0} -ne 0 ]; then
13 | STACS_FLAGS="${STACS_FLAGS} --skip-unprocessable"
14 | fi
15 |
16 | if [ ${STACS_THREADS:-10} -ne 10 ]; then
17 | STACS_FLAGS="${STACS_FLAGS} --threads ${STACS_THREADS}"
18 | fi
19 |
20 | if [ ${STACS_DEBUG:-0} -ne 0 ]; then
21 | STACS_FLAGS="${STACS_FLAGS} --debug"
22 | fi
23 |
24 | if [ ${STACS_OUTPUT_PRETTY:-0} -ne 0 ]; then
25 | STACS_FLAGS="${STACS_FLAGS} --pretty"
26 | fi
27 |
28 | # If additional arguments are provided, use them instead of defaults.
29 | if [ "$#" -gt 0 ]; then
30 | stacs "$@"
31 | else
32 | # Use an ignore list, if present.
33 | if [ -e "${SCAN_DIR}/stacs.ignore.json" ]; then
34 | stacs \
35 | --rule-pack /mnt/stacs/rules/credential.json \
36 | --cache-directory /mnt/stacs/cache \
37 | --ignore-list "${SCAN_DIR}/stacs.ignore.json" \
38 | ${STACS_FLAGS} \
39 | "${SCAN_DIR}/"
40 | else
41 | stacs \
42 | --rule-pack /mnt/stacs/rules/credential.json \
43 | --cache-directory /mnt/stacs/cache \
44 | ${STACS_FLAGS} \
45 | "${SCAN_DIR}/"
46 | fi
47 | fi
48 |
--------------------------------------------------------------------------------