├── .github ├── CODEOWNERS └── workflows │ ├── check.yml │ ├── publish.yml │ ├── publish_to_testing.yml │ └── update.yml ├── .gitignore ├── .vscode ├── c_cpp_properties.json └── settings.json ├── Dockerfile ├── LICENSE ├── README.md ├── docs └── images │ ├── Human-Output-Example.png │ ├── SARIF-Viewer-Example.png │ ├── STACS-Logo-RGB.png │ └── STACS-Logo-RGB.small.png ├── pyproject.toml ├── setup.py ├── stacs ├── __init__.py ├── native │ └── archive │ │ └── src │ │ ├── archive.cpp │ │ ├── archiveentry.cpp │ │ ├── archiveentry.hpp │ │ ├── archivereader.cpp │ │ └── archivereader.hpp └── scan │ ├── __about__.py │ ├── __init__.py │ ├── constants.py │ ├── entrypoint │ ├── __init__.py │ └── cli.py │ ├── exceptions.py │ ├── filter │ ├── __init__.py │ └── ignore_list.py │ ├── helper.py │ ├── loader │ ├── __init__.py │ ├── archive.py │ ├── filepath.py │ ├── format │ │ ├── __init__.py │ │ ├── dmg.py │ │ └── xar.py │ └── manifest.py │ ├── model │ ├── __init__.py │ ├── finding.py │ ├── ignore_list.py │ ├── manifest.py │ └── pack.py │ ├── output │ ├── __init__.py │ ├── markdown.py │ ├── pretty.py │ └── sarif.py │ └── scanner │ ├── __init__.py │ └── rules.py ├── tests ├── __init__.py ├── fixtures │ ├── .gitignore │ ├── findings │ │ ├── 001.txt │ │ ├── 002.txt │ │ ├── 003.txt │ │ └── 004.txt │ ├── ignore_list │ │ ├── 001-simple.valid.json │ │ ├── 002-framework.valid.json │ │ ├── 002-project.valid.json │ │ └── 002-system.valid.json │ └── pack │ │ ├── 001-simple.valid.json │ │ ├── 002-cloud.valid.json │ │ ├── 002-parent.valid.json │ │ ├── 002-pki-dsa.valid.json │ │ ├── 002-pki-rsa.valid.json │ │ └── 002-pki.valid.json ├── test_filter_ignore_list.py ├── test_loader_filepath.py ├── test_model_ignore_list.py ├── test_model_pack.py ├── test_output_sarif.py └── test_scanner_rule.py └── wrapper └── stacs-scan /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Own everything by default. This can be changed later and as needed. 2 | * @darkarnium 3 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | check: 10 | strategy: 11 | matrix: 12 | python: ['3.9', '3.10', '3.11'] 13 | 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Install Dependencies 19 | run: | 20 | sudo apt update 21 | sudo apt install -y libarchive13 libarchive-dev 22 | 23 | - name: Configure Python (${{ matrix.python }}) 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python }} 27 | 28 | - name: Install Tox 29 | run: | 30 | python -m pip install --upgrade pip wheel setuptools 31 | pip install tox 32 | 33 | - name: Run Linters (${{ matrix.python }}) 34 | run: | 35 | tox -e linters 36 | 37 | - name: Run Tests (${{ matrix.python }}) 38 | run: | 39 | tox -e py3 40 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build_wheels: 9 | name: Build wheels on ${{ matrix.os }} 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12] 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | with: 18 | ref: ${{ github.event.inputs.release }} 19 | 20 | - name: Configure Python 21 | uses: actions/setup-python@v3 22 | 23 | # TODO: This may result in macOS compiling against a newer version of libarchive 24 | # than Linux. 25 | - name: Install dependencies (macOS) 26 | if: startsWith(matrix.os, 'macos-') 27 | run: brew install libarchive 28 | 29 | - name: Install cibuildwheel 30 | run: python -m pip install cibuildwheel==2.12.3 31 | 32 | - name: Build wheels (macOS) 33 | run: python -m cibuildwheel --output-dir wheelhouse 34 | if: startsWith(matrix.os, 'macos-') 35 | 36 | - name: Build wheels (Ubuntu) 37 | run: python -m cibuildwheel --output-dir wheelhouse 38 | if: startsWith(matrix.os, 'ubuntu-') 39 | env: 40 | CIBW_BEFORE_ALL_LINUX: > 41 | curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz && 42 | tar -zxvf libarchive-3.6.1.tar.gz && 43 | cd libarchive-3.6.1/ && 44 | ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat && 45 | make && 46 | make install 47 | 48 | - uses: actions/upload-artifact@v3 49 | with: 50 | path: ./wheelhouse/*.whl 51 | 52 | build_sdist: 53 | runs-on: ubuntu-latest 54 | steps: 55 | - uses: actions/checkout@v2 56 | 57 | - name: Configure Python 58 | uses: actions/setup-python@v2 59 | with: 60 | python-version: 3.11 61 | 62 | - name: Build Python sdist 63 | run: | 64 | python -m pip install --upgrade pip wheel setuptools 65 | pip install build 66 | python -m build --sdist --outdir dist/ . 67 | 68 | - uses: actions/upload-artifact@v3 69 | with: 70 | path: dist/*.tar.gz 71 | 72 | publish: 73 | needs: [build_wheels, build_sdist] 74 | runs-on: ubuntu-latest 75 | steps: 76 | - uses: actions/checkout@v2 77 | with: 78 | ref: ${{ github.event.inputs.release }} 79 | 80 | - uses: actions/download-artifact@v3 81 | with: 82 | name: artifact 83 | path: dist 84 | 85 | - name: Extract version 86 | run: | 87 | git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules 88 | pushd /tmp/stacs-rules 89 | RULES_VERSION="$(git rev-parse --short HEAD)" 90 | popd 91 | STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')" 92 | echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}" 93 | echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}" 94 | 95 | - name: Publish Python package 96 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 97 | with: 98 | user: __token__ 99 | password: ${{ secrets.PYPI_TOKEN }} 100 | 101 | - name: Wait a minute for PyPi to catch up 102 | run: sleep 60s 103 | shell: bash 104 | 105 | - name: Login to DockerHub 106 | uses: docker/login-action@v1 107 | with: 108 | username: ${{ secrets.DOCKERHUB_USERNAME }} 109 | password: ${{ secrets.DOCKERHUB_TOKEN }} 110 | 111 | - name: Build and push Docker image 112 | id: docker_build 113 | uses: docker/build-push-action@v2 114 | with: 115 | context: . 116 | push: true 117 | tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }} 118 | build-args: | 119 | VERSION=${{ env.IMAGE_VERSION }} 120 | STACS_BUILD=${{ env.STACS_VERSION }} 121 | -------------------------------------------------------------------------------- /.github/workflows/publish_to_testing.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Testing 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | commit: 7 | description: The commit ref to build and release to PyPI testing. 8 | required: true 9 | 10 | jobs: 11 | build_wheels: 12 | name: Build wheels on ${{ matrix.os }} 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12] 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | with: 21 | ref: ${{ github.event.inputs.release }} 22 | 23 | - name: Configure Python 24 | uses: actions/setup-python@v3 25 | 26 | # TODO: This may result in macOS compiling against a newer version of libarchive 27 | # than Linux. 28 | - name: Install dependencies (macOS) 29 | if: startsWith(matrix.os, 'macos-') 30 | run: brew install libarchive 31 | 32 | # This is rather unpleasant and the package versioning should be adjusted to 33 | # allow snapshot build numbers to be injected via setuptools, etc. 34 | - name: Set development version suffix 35 | run: | 36 | sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \ 37 | stacs/scan/__about__.py 38 | 39 | - name: Install cibuildwheel 40 | run: python -m pip install cibuildwheel==2.12.3 41 | 42 | - name: Build wheels (macOS) 43 | run: python -m cibuildwheel --output-dir wheelhouse 44 | if: startsWith(matrix.os, 'macos-') 45 | env: 46 | CIBW_BEFORE_BUILD: pip install pybind11 47 | CIBW_ENVIRONMENT: > 48 | CPPFLAGS="-std=c++11 -I$(find `brew --cellar libarchive` -name include -type d)" \ 49 | LDFLAGS="-L$(find `brew --cellar libarchive` -name include -type d)" \ 50 | PKG_CONFIG="$(find `brew --cellar libarchive` -name pkgconfig -type d)" 51 | 52 | - name: Build wheels (Ubuntu) 53 | run: python -m cibuildwheel --output-dir wheelhouse 54 | if: startsWith(matrix.os, 'ubuntu-') 55 | env: 56 | CIBW_BEFORE_BUILD: pip install pybind11 57 | CIBW_BEFORE_ALL_LINUX: > 58 | curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz && 59 | tar -zxvf libarchive-3.6.1.tar.gz && 60 | cd libarchive-3.6.1/ && 61 | ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat && 62 | make && 63 | make install 64 | 65 | - uses: actions/upload-artifact@v3 66 | with: 67 | path: ./wheelhouse/*.whl 68 | 69 | build_sdist: 70 | runs-on: ubuntu-latest 71 | steps: 72 | - uses: actions/checkout@v2 73 | 74 | - name: Configure Python 75 | uses: actions/setup-python@v2 76 | with: 77 | python-version: 3.11 78 | 79 | # This is rather unpleasant and the package versioning should be adjusted to 80 | # allow snapshot build numbers to be injected via setuptools, etc. 81 | - name: Set development version suffix 82 | run: | 83 | sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \ 84 | stacs/scan/__about__.py 85 | 86 | - name: Build Python sdist 87 | run: | 88 | python -m pip install --upgrade pip wheel setuptools 89 | pip install build 90 | python -m build --sdist --outdir dist/ . 91 | 92 | - uses: actions/upload-artifact@v3 93 | with: 94 | path: dist/*.tar.gz 95 | 96 | publish: 97 | needs: [build_wheels, build_sdist] 98 | runs-on: ubuntu-latest 99 | steps: 100 | - uses: actions/download-artifact@v3 101 | with: 102 | name: artifact 103 | path: dist 104 | 105 | - name: Publish Python package 106 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 107 | with: 108 | user: __token__ 109 | password: ${{ secrets.PYPI_TESTING_TOKEN }} 110 | repository_url: "https://test.pypi.org/legacy/" 111 | -------------------------------------------------------------------------------- /.github/workflows/update.yml: -------------------------------------------------------------------------------- 1 | name: Update 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | release: 7 | description: The tagged release version to rebuild with the latest rules. 8 | default: 0.0.0 9 | required: true 10 | 11 | jobs: 12 | update: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Configure Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: 3.11 21 | 22 | - name: Extract version 23 | run: | 24 | git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules 25 | pushd /tmp/stacs-rules 26 | RULES_VERSION="$(git rev-parse --short HEAD)" 27 | popd 28 | STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')" 29 | echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}" 30 | echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}" 31 | 32 | - name: Login to DockerHub 33 | uses: docker/login-action@v1 34 | with: 35 | username: ${{ secrets.DOCKERHUB_USERNAME }} 36 | password: ${{ secrets.DOCKERHUB_TOKEN }} 37 | 38 | - name: Build and push Docker image 39 | id: docker_build 40 | uses: docker/build-push-action@v2 41 | with: 42 | context: . 43 | push: true 44 | tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }} 45 | build-args: | 46 | VERSION=${{ env.IMAGE_VERSION }} 47 | STACS_BUILD=${{ env.STACS_VERSION }} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # Tracker 141 | TODO.md 142 | 143 | # Profiling information. 144 | *.prof 145 | result.json 146 | stacs-rules/ 147 | 148 | # macOS files. 149 | .DS_Store 150 | 151 | # Wheel build. 152 | wheelhouse/ 153 | 154 | # Ignore compiled shared objects. 155 | *.so 156 | -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Linux", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "/usr/include/python3.9" 8 | ], 9 | "defines": [], 10 | "compilerPath": "/usr/bin/gcc", 11 | "cStandard": "gnu17", 12 | "cppStandard": "gnu++14", 13 | "intelliSenseMode": "linux-gcc-x64" 14 | } 15 | ], 16 | "version": 4 17 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": false, 3 | "python.linting.flake8Enabled": true, 4 | "python.linting.enabled": true, 5 | "python.formatting.provider": "black", 6 | "editor.formatOnSave": true, 7 | "editor.codeActionsOnSave": { 8 | "source.organizeImports": true 9 | }, 10 | "editor.rulers": [ 11 | 79, 12 | 88 13 | ], 14 | "C_Cpp.clang_format_fallbackStyle": "{ BasedOnStyle: Google, IndentWidth: 4, ColumnLimit: 0}" 15 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-alpine 2 | 3 | # Allow build-time specification of version. 4 | ARG VERSION 5 | ARG STACS_BUILD 6 | 7 | # Allow runtime tuning. 8 | ENV STACS_SKIP_UNPROCESSABLE=0 9 | ENV STACS_THREADS=10 10 | ENV STACS_DEBUG=0 11 | ENV STACS_OUTPUT_PRETTY=0 12 | 13 | # Keep things friendly. 14 | LABEL org.opencontainers.image.title="STACS" 15 | LABEL org.opencontainers.image.description="Static Token And Credential Scanner" 16 | LABEL org.opencontainers.image.url="https://www.github.com/stacscan/stacs" 17 | LABEL org.opencontainers.image.version=$VERSION 18 | 19 | # Install STACS into the container. 20 | WORKDIR /opt/stacs 21 | COPY wrapper/stacs-scan /usr/bin 22 | 23 | RUN apk add --no-cache git gcc musl-dev zstd && \ 24 | pip install --no-cache-dir stacs==$STACS_BUILD 25 | 26 | # Clone the latest STACS rules into the rules directory to enable out of the box use. 27 | # This can be mounted over using a volume mount to allow more specific rules to be 28 | # loaded. The same is true for "ignore-lists". Finally, there is a "cache" directory 29 | # configured as a mount to allow scans which need a lot of disk space to mount a scratch 30 | # volume so that Docker doesn't run out of disk :) 31 | RUN mkdir -p /mnt/stacs/input /mnt/stacs/rules /mnt/stacs/ignore /mnt/stacs/cache && \ 32 | git clone https://www.github.com/stacscan/stacs-rules /mnt/stacs/rules 33 | 34 | # Define a volume to allow mounting a local directory to scan. 35 | VOLUME /mnt/stacs/input 36 | VOLUME /mnt/stacs/rules 37 | VOLUME /mnt/stacs/ignore 38 | VOLUME /mnt/stacs/cache 39 | 40 | # Clean up. 41 | RUN apk del --purge git musl-dev gcc 42 | 43 | # Default to running stacs with the volume mounts. 44 | ENTRYPOINT ["stacs-scan"] 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Peter Adkins 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Shield](https://img.shields.io/github/actions/workflow/status/stacscan/stacs/check.yml?label=Tests&style=flat-square)](https://github.com/stacscan/stacs/actions?workflow=Check) 2 | [![Shield](https://img.shields.io/github/actions/workflow/status/stacscan/stacs/publish.yml?label=Deploy&style=flat-square)](https://github.com/stacscan/stacs/actions?workflow=Publish) 3 | [![Shield](https://img.shields.io/docker/pulls/stacscan/stacs?style=flat-square)](https://hub.docker.com/r/stacscan/stacs) 4 | [![Shield](https://img.shields.io/docker/image-size/stacscan/stacs?style=flat-square)](https://hub.docker.com/r/stacscan/stacs/tags?page=1&ordering=last_updated) 5 | [![Shield](https://img.shields.io/twitter/follow/stacscan?style=flat-square)](https://twitter.com/stacscan) 6 |

7 |

8 | 9 |

10 |

11 |
12 | Static Token And Credential Scanner 13 |
14 |

15 | 16 | ### What is it? 17 | 18 | STACS is a [YARA](https://virustotal.github.io/yara/) powered static credential scanner 19 | which suports binary file formats, analysis of nested archives, composable rulesets 20 | and ignore lists, and SARIF reporting. 21 | 22 | ### What does STACS support? 23 | 24 | Currently, STACS supports recursive unpacking of: 25 | 26 | * 7z, ar, bz2, cab, cpio, gz, iso, rar, rpm, tar, xar, xz, zip, dmg 27 | 28 | As STACS works on detected file types, proprietary file formats based and other 29 | file-types which use these formats are automatically supported. This includes Docker 30 | images, Android APKs, Java JAR files, RPMs, Debian packages (`.deb`), macOS packages 31 | (`.pkg`), and more! 32 | 33 | ### Who should use STACS? 34 | 35 | STACS is designed for use by any teams who release binary artifacts. STACS provides 36 | developers the ability to automatically check for accidental inclusion of static 37 | credentials and key material in their releases. 38 | 39 | However, this doesn't mean STACS can't help with SaaS applications, enterprise 40 | software, or even source code! 41 | 42 | As an example, STACS can be used to find static credentials in Docker images uploaded 43 | to public and private container registries. It can also be used to find credentials 44 | accidentally compiled in to executables, packages for mobile devices, and "enterprise 45 | archives" - such as those used by Java application servers. 46 | 47 | ### How does it work? 48 | 49 | STACS detects static credentials using "rule packs" provided to STACS when run. These 50 | rule packs define a set of YARA rules to run against files provided to STACS. When a 51 | match against a rule is found, a "finding" is generated. These findings represent 52 | potential credentials inside of a file, and are reported on for a developer to remediate 53 | or "ignore". 54 | 55 | If the finding is found to be a false positive - that is, a match on something other 56 | than a real credential - the developer can generate a set of "ignore lists" to ensure 57 | that these matches don't appear in future reports. 58 | 59 | The real power from STACS comes from the automatic detection and unpacking of nested 60 | archives, and composable ignore lists and rule packs. 61 | 62 | #### Ignore lists? 63 | 64 | In order to allow flexible and collaborative usage, STACS supports composable ignore 65 | lists. This allows for an ignore list to include other ignore lists which enable 66 | composition of a "tree of ignores" based on organisational guidelines. These ignore 67 | lists are especially useful in organisations where many of the same frameworks or 68 | products are used. If a team has already marked a finding as a false positive, other 69 | teams get the benefit of not having to triage the same finding. 70 | 71 | #### Rule packs? 72 | 73 | In the same manner as ignore lists, rule packs are also composable. This enables an 74 | organisation to define a baseline set of rules for use by all teams, while still 75 | allowing teams to maintain rulesets specific to their products. 76 | 77 | ### How do I use it? 78 | 79 | The easiest way to use STACS is using the Docker images published to Docker Hub. 80 | However, STACS can also be installed directly from Python's PyPI, or by cloning this 81 | repository. See the relevant sections below to get started! 82 | 83 | A cloud based service is coming soon which allows integration directly in build 84 | and release pipelines to enable detection of static credentials before release! 85 | 86 | #### Docker 87 | 88 | Using the published images, STACS can be used to scan artifacts right away! The STACS 89 | Docker images provides a number of volume mounts for files wanted to be scanned to be 90 | mounted directly into the scan container. 91 | 92 | As an example, to scan everything in the current folder, the following command can be 93 | run (Docker must be installed). 94 | 95 | ``` 96 | docker run \ 97 | --rm \ 98 | -v "$(pwd):/mnt/stacs/input:ro" \ 99 | stacscan/stacs:latest 100 | ``` 101 | 102 | If you would like to receive "pretty" readable output, the following command should be 103 | used: 104 | 105 | ``` 106 | docker run \ 107 | --rm \ 108 | -e STACS_OUTPUT_PRETTY=1 \ 109 | -v "$(pwd):/mnt/stacs/input:ro" \ 110 | stacscan/stacs:latest 111 | ``` 112 | 113 | By default, STACS will output any findings in SARIF format directly to STDOUT and in 114 | order to keep things orderly, all log messages will be sent to STDERR. For more advanced 115 | use cases, a number of other volume mounts are provided. These allow the user to control 116 | the rule packs, ignore lists, and a cache directories to use. 117 | 118 | #### PyPi 119 | 120 | STACS can also be installed directly from Python's PyPi. This provides a `stacs` command 121 | which can then be used by developers to scan projects directly in their local 122 | development environments. 123 | 124 | STACS can be installed directly from PyPi using: 125 | 126 | ``` 127 | pip install stacs 128 | ``` 129 | 130 | **Please Note:** The PyPi release of STACS does not come with any rules. These will also 131 | need to be cloned from the [community rules repository](https://github.com/stacscan/stacs-rules) 132 | for STACS to work! 133 | 134 | ### FAQ 135 | 136 | #### Is there a hosted version of STACS? 137 | 138 | Not yet. However, there are plans for a hosted version of STACS which can be easily 139 | integrated into existing build systems, and which contains additional prebuilt rule 140 | packs and ignore lists. 141 | 142 | #### What do I do about false positives? 143 | 144 | Unfortunately, false positives are an inevitable side effect during the detection of 145 | static credentials. If rules are too granular then rule maintenance becomes a burden 146 | and STACS may miss credentials. If rules are too coarse then STACS may generate too 147 | many false positives! 148 | 149 | In order to assist, STACS provides a number of tools to assist with reducing the number 150 | of false positives which make it into final reports. 151 | 152 | Primarily, STACS provides a mechanism which allows users to define composable ignore 153 | lists which allow a set of findings to be "ignored". These rules can be as coarse as 154 | ignoring all files based on a pattern, or as granular as a specific finding on a 155 | particular line of a file. 156 | 157 | This information is automatically propagated through into reports, so "ignored" findings 158 | will be marked as "suppressed" in SARIF output while also including the reason for the 159 | ignore in the output for tracking. 160 | 161 | #### How do I view the results? 162 | 163 | If using "pretty" output (`--pretty` / `STACS_OUTPUT_PRETTY`), results will be printed 164 | in a human readable format to the console. 165 | 166 | ![Human Output](https://raw.githubusercontent.com/stacscan/stacs/main/docs/images/Human-Output-Example.png) 167 | 168 | If using SARIF, there are a number of viewers available which make this data easier to 169 | read, such as [this great web based viewer from](https://microsoft.github.io/sarif-web-component/) 170 | Microsoft. An example of the findings from a Docker container image has been included 171 | below: 172 | 173 | ![Microsoft SARIF Viewer Output](https://raw.githubusercontent.com/stacscan/stacs/main/docs/images/SARIF-Viewer-Example.png) 174 | 175 | #### The performance is really, really bad when running in Docker on macOS! 176 | 177 | Unfortunately, this appears to be due to a limitation of Docker Desktop for Mac. I/O 178 | for bind mounts [is really, really slow](https://github.com/docker/for-mac/issues/3677). 179 | -------------------------------------------------------------------------------- /docs/images/Human-Output-Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/Human-Output-Example.png -------------------------------------------------------------------------------- /docs/images/SARIF-Viewer-Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/SARIF-Viewer-Example.png -------------------------------------------------------------------------------- /docs/images/STACS-Logo-RGB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.png -------------------------------------------------------------------------------- /docs/images/STACS-Logo-RGB.small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.small.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm", "pybind11"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "stacs" 7 | readme = "README.md" 8 | description = "Static Token And Credential Scanner." 9 | requires-python = ">=3.8" 10 | dynamic = ["version"] 11 | authors = [{name = "Peter Adkins"}] 12 | license = {text = "BSD-3-Clause"} 13 | classifiers = [ 14 | "Programming Language :: Python :: 3.8", 15 | "Natural Language :: English", 16 | ] 17 | dependencies = [ 18 | "click>=8.1.0,<9.0", 19 | "yara-python==4.2.3", 20 | "pydantic>=1.10.0,<2.0", 21 | "colorama>=0.4.0,<1.0", 22 | "zstandard>=0.18.0,<1.0", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | tests = [ 27 | "black", 28 | "coverage", 29 | "ruff", 30 | "types-setuptools", 31 | "mypy", 32 | "pip-tools", 33 | "mock", 34 | "pytest", 35 | "pytest-cov", 36 | "responses", 37 | "tox", 38 | "ipython", 39 | ] 40 | 41 | [tool.setuptools.dynamic] 42 | version = {attr = "stacs.scan.__about__.__version__"} 43 | 44 | [tool.setuptools.packages.find] 45 | where = ["."] 46 | include = ["stacs.*"] 47 | 48 | [project.scripts] 49 | stacs = "stacs.scan.entrypoint.cli:main" 50 | 51 | [tool.ruff] 52 | line-length = 88 53 | extend-select = [ 54 | "B", # flake8-bugbear 55 | "I", # isort 56 | ] 57 | ignore = [ 58 | "B904", 59 | "I001", 60 | "B005", 61 | ] 62 | 63 | [tool.mypy] 64 | files = [ 65 | "./stacs/**/*.py", 66 | "./tests/**/*.py" 67 | ] 68 | allow_redefinition = false 69 | check_untyped_defs = true 70 | disallow_any_generics = true 71 | disallow_untyped_calls = false 72 | ignore_errors = false 73 | ignore_missing_imports = true 74 | implicit_reexport = false 75 | local_partial_types = true 76 | strict_optional = true 77 | strict_equality = true 78 | no_implicit_optional = true 79 | warn_no_return = true 80 | warn_unused_ignores = true 81 | warn_redundant_casts = true 82 | warn_unused_configs = true 83 | warn_unreachable = true 84 | 85 | [tool.pytest.ini_options] 86 | junit_family = "xunit2" 87 | norecursedirs = ".*" 88 | self-contained-html = true 89 | testpaths = [ 90 | "tests" 91 | ] 92 | addopts = """ 93 | --strict 94 | --tb=auto 95 | --cov=stacs 96 | --cov-report=term-missing:skip-covered 97 | --cov-branch 98 | -p no:doctest 99 | -p no:warnings 100 | -s 101 | """ 102 | 103 | [tool.tox] 104 | legacy_tox_ini = """ 105 | [tox] 106 | envlist = linters,py3 107 | 108 | [testenv] 109 | pip_version = pip 110 | extras = tests 111 | commands = pytest -c pyproject.toml 112 | srcs = stacs 113 | 114 | [testenv:linters] 115 | basepython = python3 116 | usedevelop = true 117 | commands = 118 | {[testenv:ruff]commands} 119 | 120 | [testenv:ruff] 121 | basepython = python3 122 | skip_install = true 123 | commands = 124 | ruff check {[testenv]srcs} 125 | 126 | [testenv:mypy] 127 | basepython3 = python3 128 | skip_install = true 129 | commands = 130 | - mypy --config-file pyproject.toml {[testenv]srcs} 131 | """ 132 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup required for pybind11 built native code only.""" 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | from typing import List 7 | 8 | from pybind11.setup_helpers import Pybind11Extension 9 | from setuptools import setup 10 | 11 | ext_modules = [ 12 | Pybind11Extension( 13 | "stacs.native.archive", 14 | ["stacs/native/archive/src/archive.cpp"], 15 | libraries=["archive"], 16 | ), 17 | ] 18 | 19 | 20 | def run(command: List[str]): 21 | """Run a command, returning the output as a string or an exception on failure.""" 22 | result = subprocess.run(command, capture_output=True, check=True) 23 | return str(result.stdout, "utf-8").strip() 24 | 25 | 26 | # macOS requires a bit of special handling to ensure that the - likely - brew installed 27 | # libarchive is discoverable. The macOS built-in libarchive is no good, as it's too 28 | # old. 29 | if platform.system() == "Darwin": 30 | libarchive = run(["brew", "--cellar", "libarchive"]) 31 | libarchive_headers = run(["find", libarchive, "-name", "include", "-type", "d"]) 32 | libarchive_pkgconfig = run(["find", libarchive, "-name", "pkgconfig", "-type", "d"]) 33 | 34 | # Setup the environment for the build. 35 | os.environ["LDFLAGS"] = f"-L{libarchive_headers}" 36 | os.environ["PKG_CONFIG"] = libarchive_pkgconfig 37 | os.environ["CPPFLAGS"] = " ".join( 38 | [ 39 | os.environ.get("CPPFLAGS", ""), 40 | "-std=c++11", 41 | f"-I{libarchive_headers}", 42 | ] 43 | ) 44 | 45 | setup(ext_modules=ext_modules, packages=[]) 46 | -------------------------------------------------------------------------------- /stacs/__init__.py: -------------------------------------------------------------------------------- 1 | """STACS - Static Token And Credential Scanner. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | __import__("pkg_resources").declare_namespace(__name__) 7 | -------------------------------------------------------------------------------- /stacs/native/archive/src/archive.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file archive.cpp 3 | * @author Peter Adkins 4 | * @date 2022-07-02 5 | */ 6 | 7 | #include 8 | 9 | #include "archiveentry.cpp" 10 | #include "archivereader.cpp" 11 | 12 | namespace py = pybind11; 13 | 14 | PYBIND11_MODULE(archive, module) { 15 | module.doc() = "STACS Native Extensions for Archives"; 16 | module.attr("__name__") = "stacs.native.archive"; 17 | 18 | py::class_(module, "ArchiveReader") 19 | .def(py::init()) 20 | .def_property_readonly("filename", &ArchiveReader::getFilename) 21 | .def("__enter__", &ArchiveReader::enter) 22 | .def("__exit__", &ArchiveReader::exit) 23 | .def("__iter__", &ArchiveReader::iter) 24 | .def("__next__", &ArchiveReader::next) 25 | .def("read", &ArchiveReader::read) 26 | .doc() = "An interface to read archive contents (via libarchive)"; 27 | 28 | py::class_(module, "ArchiveEntry") 29 | .def_property_readonly("filename", &ArchiveEntry::getFilename) 30 | .def_property_readonly("isdir", &ArchiveEntry::isDirectory) 31 | .def_property_readonly("size", &ArchiveEntry::getSize) 32 | .doc() = "Represents a member of an Archive"; 33 | 34 | py::register_exception(module, "ArchiveError"); 35 | } 36 | -------------------------------------------------------------------------------- /stacs/native/archive/src/archiveentry.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file archivereader.cpp 3 | * @author Peter Adkins 4 | * @date 2022-07-02 5 | */ 6 | 7 | #include "archiveentry.hpp" 8 | 9 | #include 10 | 11 | #include 12 | 13 | ArchiveEntry::ArchiveEntry(struct archive_entry *entry) { 14 | this->entry = entry; 15 | } 16 | 17 | ArchiveEntry::~ArchiveEntry() { 18 | } 19 | 20 | /** 21 | * Gets the filename of the archive member. 22 | * 23 | * @return std::string 24 | */ 25 | std::string ArchiveEntry::getFilename() { 26 | return archive_entry_pathname_utf8(this->entry); 27 | } 28 | 29 | /** 30 | * Gets the file size of the archive member. 31 | * 32 | * @return int64_t 33 | */ 34 | int64_t ArchiveEntry::getSize() { 35 | return archive_entry_size(this->entry); 36 | } 37 | 38 | /** 39 | * Checks whether the current archive member is a directory. 40 | * 41 | * @return bool 42 | */ 43 | bool ArchiveEntry::isDirectory() { 44 | if (S_ISDIR(archive_entry_mode(this->entry)) != 0) { 45 | return true; 46 | } else { 47 | return false; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /stacs/native/archive/src/archiveentry.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file archivereader.cpp 3 | * @author Peter Adkins 4 | * @date 2022-07-02 5 | */ 6 | 7 | #pragma once 8 | 9 | extern "C" { 10 | #include 11 | #include 12 | } 13 | 14 | #include 15 | 16 | class ArchiveEntry { 17 | public: 18 | ArchiveEntry(struct archive_entry *entry); 19 | ~ArchiveEntry(); 20 | 21 | std::string getFilename(); 22 | int64_t getSize(); 23 | bool isDirectory(); 24 | 25 | private: 26 | struct archive_entry *entry; 27 | }; 28 | -------------------------------------------------------------------------------- /stacs/native/archive/src/archivereader.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file archivereader.cpp 3 | * @author Peter Adkins 4 | * @date 2022-07-02 5 | */ 6 | 7 | #include "archivereader.hpp" 8 | 9 | #include "archiveentry.hpp" 10 | 11 | extern "C" { 12 | #include 13 | #include 14 | } 15 | 16 | const char *ArchiveError::what() const noexcept { 17 | return "Unable to open archive for reading\n"; 18 | } 19 | 20 | ArchiveReader::ArchiveReader(const std::string &filename) : filename(filename) { 21 | } 22 | 23 | ArchiveReader::~ArchiveReader() { 24 | } 25 | 26 | ArchiveReader *ArchiveReader::iter() { 27 | return this; 28 | } 29 | 30 | /** 31 | * Gets the filename of the currently open file. 32 | * 33 | * @return std::string 34 | */ 35 | std::string ArchiveReader::getFilename() { 36 | return this->filename; 37 | } 38 | 39 | /** 40 | * Reads the currently selected archive member into a buffer, returning the 41 | * number of bytes read. 0 will be returned when no more data is available. 42 | * 43 | * @return int 44 | */ 45 | pybind11::bytes ArchiveReader::read() { 46 | std::vector chunk; 47 | chunk.resize(CHUNK_SIZE); 48 | 49 | int result = archive_read_data(this->archive, 50 | chunk.data(), 51 | chunk.size()); 52 | 53 | if (result < 0) { 54 | throw ArchiveError(); 55 | } 56 | 57 | return pybind11::bytes(chunk.data(), result); 58 | } 59 | 60 | /** 61 | * Find and return the next member in the archive. 62 | * 63 | * @return ArchiveEntry 64 | */ 65 | ArchiveEntry ArchiveReader::next() { 66 | int result = archive_read_next_header(this->archive, &this->entry); 67 | 68 | if (result == ARCHIVE_OK) { 69 | return ArchiveEntry(this->entry); 70 | } 71 | if (result == ARCHIVE_EOF) { 72 | throw pybind11::stop_iteration(); 73 | } 74 | 75 | throw ArchiveError(); 76 | } 77 | 78 | /** 79 | * Loads an archive on Python Context Manager enter. 80 | * 81 | * @return ArchiveReader* 82 | */ 83 | ArchiveReader *ArchiveReader::enter() { 84 | this->archive = archive_read_new(); 85 | 86 | // Enable all libarchive supported filters and formats. 87 | archive_read_support_filter_all(this->archive); 88 | archive_read_support_format_all(this->archive); 89 | 90 | // Attempt to open the archive. 91 | int result = archive_read_open_filename(this->archive, 92 | this->filename.c_str(), 93 | 10240); 94 | 95 | if (result != ARCHIVE_OK) { 96 | throw ArchiveError(); 97 | } 98 | 99 | return this; 100 | } 101 | 102 | /** 103 | * Cleans up the open archive on Python Context Manager exit. 104 | * 105 | * @return true 106 | */ 107 | bool ArchiveReader::exit(pybind11::object exc_type, 108 | pybind11::object exc_value, 109 | pybind11::object exc_traceback) { 110 | int result = archive_read_free(this->archive); 111 | 112 | if (result == ARCHIVE_OK) { 113 | return true; 114 | } 115 | 116 | return false; 117 | } 118 | -------------------------------------------------------------------------------- /stacs/native/archive/src/archivereader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file archivereader.hpp 3 | * @author Peter Adkins 4 | * @date 2022-07-02 5 | */ 6 | 7 | #pragma once 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | const int CHUNK_SIZE = 10240; 14 | 15 | class ArchiveEntry; 16 | 17 | class ArchiveReader { 18 | public: 19 | ArchiveReader(const std::string &filename); 20 | ~ArchiveReader(); 21 | 22 | ArchiveReader *enter(); 23 | bool exit(pybind11::object exc_type, 24 | pybind11::object exc_value, 25 | pybind11::object exc_traceback); 26 | 27 | pybind11::bytes read(); 28 | ArchiveEntry next(); 29 | ArchiveReader *iter(); 30 | std::string getFilename(); 31 | 32 | private: 33 | std::vector chunk; 34 | std::string filename; 35 | struct archive *archive; 36 | struct archive_entry *entry; 37 | }; 38 | 39 | struct ArchiveError : std::exception { 40 | const char *what() const noexcept; 41 | }; 42 | -------------------------------------------------------------------------------- /stacs/scan/__about__.py: -------------------------------------------------------------------------------- 1 | """STACS - Static Token And Credential Scanner. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | __title__ = "stacs" 7 | __summary__ = "Static Token And Credential Scanner." 8 | __version__ = "0.5.1" 9 | __author__ = "Peter Adkins" 10 | __uri__ = "https://www.github.com/stacscan/stacs/" 11 | __license__ = "BSD-3-Clause" 12 | -------------------------------------------------------------------------------- /stacs/scan/__init__.py: -------------------------------------------------------------------------------- 1 | """STACS - Static Token And Credential Scanner. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan import __about__ # noqa:F401 7 | from stacs.scan import constants # noqa:F401 8 | from stacs.scan import filter # noqa:F401 9 | from stacs.scan import helper # noqa:F401 10 | from stacs.scan import loader # noqa:F401 11 | from stacs.scan import model # noqa:F401 12 | from stacs.scan import output # noqa:F401 13 | from stacs.scan import scanner # noqa:F401 14 | -------------------------------------------------------------------------------- /stacs/scan/constants.py: -------------------------------------------------------------------------------- 1 | """Define constants commonly used throughout STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | # The size of chunks to use when reading files. 7 | CHUNK_SIZE = 65536 8 | 9 | # The size, in bytes, of the sample window. 10 | WINDOW_SIZE = 20 11 | 12 | # Define the default cache directory, used to unpack archives into. 13 | CACHE_DIRECTORY = "/tmp" 14 | 15 | # Define the character to use when constructed paths to findings which are inside of 16 | # archives. 17 | ARCHIVE_FILE_SEPARATOR = "!" 18 | 19 | # Define an exit code to use when there are unsuppressed findings. 20 | EXIT_CODE_UNSUPPRESSED = 100 21 | 22 | # External licenses will be displayed during STACS banner. 23 | EXTERNAL_LICENSES = { 24 | "libarchive": [ 25 | "https://github.com/libarchive/libarchive/blob/master/COPYING", 26 | ], 27 | "yara": [ 28 | "https://github.com/VirusTotal/yara-python/blob/master/LICENSE", 29 | ], 30 | } 31 | -------------------------------------------------------------------------------- /stacs/scan/entrypoint/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines STACS entrypoints. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.entrypoint import cli # noqa:F401 7 | -------------------------------------------------------------------------------- /stacs/scan/entrypoint/cli.py: -------------------------------------------------------------------------------- 1 | """Defines the primary STACS CLI entrypoint. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import logging 7 | import os 8 | import shutil 9 | import sys 10 | import time 11 | from types import TracebackType 12 | from typing import Callable, List 13 | 14 | import click 15 | import stacs 16 | 17 | 18 | def unlink_error(function: Callable, path: str, exc_info: TracebackType): 19 | """Provides a mechanism to better handle failures to delete files after a run. 20 | 21 | Currently, this just logs out. In future we should look to fix the permissions on 22 | the path / parent and call func(path) to attempt the deletion again. However, we'll 23 | need to ensure that path is actually part of the cache directory. So for now, we 24 | log. 25 | """ 26 | logger = logging.getLogger("stacs") 27 | logger.warning(f"Unable to remove {path}") 28 | 29 | 30 | @click.command() 31 | @click.version_option() 32 | @click.option( 33 | "--debug", 34 | is_flag=True, 35 | help="Increase verbosity of logs for debugging", 36 | ) 37 | @click.option( 38 | "--pretty", 39 | help="Display outputs in a human-readable tree, rather than SARIF.", 40 | is_flag=True, 41 | ) 42 | @click.option( 43 | "--threads", 44 | help="The number of threads to use when processing files", 45 | default=10, 46 | ) 47 | @click.option( 48 | "--rule-pack", 49 | help="The path to the rule pack to load.", 50 | default="~/.stacs/pack.json", 51 | ) 52 | @click.option( 53 | "--ignore-list", 54 | help="The path to the ignore list to load (if required).", 55 | ) 56 | @click.option( 57 | "--skip-unprocessable", 58 | help="Skip unprocessable / corrupt archives with a warning.", 59 | is_flag=True, 60 | ) 61 | @click.option( 62 | "--cache-directory", 63 | help="The path to use as a cache - used when unpacking archives.", 64 | default=stacs.scan.constants.CACHE_DIRECTORY, 65 | ) 66 | @click.argument("paths", nargs=-1, required=True) 67 | def main( 68 | debug: bool, 69 | pretty: bool, 70 | threads: int, 71 | rule_pack: str, 72 | ignore_list: str, 73 | skip_unprocessable: bool, 74 | cache_directory: str, 75 | paths: List[str], 76 | ) -> None: 77 | """STACS - Static Token And Credential Scanner.""" 78 | logging.basicConfig( 79 | level=logging.DEBUG if debug else logging.INFO, 80 | format="%(asctime)s - %(process)d - [%(levelname)s] %(message)s", 81 | ) 82 | logger = logging.getLogger("stacs") 83 | logger.info(f"STACS running with {threads} threads") 84 | 85 | # Licenses. 86 | for project, urls in stacs.scan.constants.EXTERNAL_LICENSES.items(): 87 | logger.info(f"STACS uses {project} (licenses may be found at {' '.join(urls)})") 88 | 89 | # Load the rule pack. 90 | logger.info(f"Attempting to load rule pack from {rule_pack}") 91 | try: 92 | pack = stacs.scan.model.pack.from_file(rule_pack) 93 | except stacs.scan.exceptions.STACSException as err: 94 | logger.error(f"Unable to load rule pack: {err}") 95 | sys.exit(-1) 96 | 97 | # Load the ignore list. 98 | ignored = [] 99 | if ignore_list: 100 | logger.info(f"Attempting to load ignore list from {ignore_list}") 101 | try: 102 | ignored = stacs.scan.model.ignore_list.from_file(ignore_list) 103 | logger.debug(f"Loaded {len(ignored.ignore)} suppressions from ignore list.") 104 | except stacs.scan.exceptions.STACSException as err: 105 | logger.error(f"Unable to load ignore list: {err}") 106 | sys.exit(-1) 107 | 108 | # Append a timestamp to the cache directory to reduce the chance of collisions. 109 | cache_directory = os.path.join(cache_directory, str(int(time.time_ns() / 1000))) 110 | try: 111 | os.mkdir(cache_directory) 112 | logger.info(f"Using cache directory at {cache_directory}") 113 | except OSError as err: 114 | logger.error(f"Unable to create cache directory at {cache_directory}: {err}") 115 | sys.exit(-2) 116 | 117 | # Generate a list of candidate files to scan. 118 | targets = [] 119 | 120 | for path in paths: 121 | path = os.path.abspath(os.path.expanduser(path)) 122 | logger.info(f"Attempting to get a list of files to scan from {path}") 123 | try: 124 | targets.extend( 125 | stacs.scan.loader.filepath.finder( 126 | path, 127 | cache_directory, 128 | skip_on_corrupt=skip_unprocessable, 129 | workers=threads, 130 | ) 131 | ) 132 | except stacs.scan.exceptions.STACSException as err: 133 | logger.error(f"Unable to generate file list: {err}") 134 | sys.exit(-2) 135 | 136 | # Submit files for analysis. 137 | logger.info(f"Found {len(targets)} files for analysis") 138 | 139 | findings = [] 140 | for scanner in stacs.scan.scanner.__all__: 141 | try: 142 | findings.extend( 143 | getattr(stacs.scan.scanner, scanner).run(targets, pack, workers=threads) 144 | ) 145 | except stacs.scan.exceptions.InvalidFormatException as err: 146 | logger.error(f"Unable to load a rule in scanner {scanner}: {err}") 147 | continue 148 | 149 | # Filter findings by allow list. 150 | if ignored: 151 | findings = stacs.scan.filter.ignore_list.process(findings, ignored) 152 | 153 | # Clean-up cache directory. 154 | shutil.rmtree(cache_directory, onerror=unlink_error) 155 | 156 | # Determine the correct exit status based on whether there were unsuppressed 157 | # findings. 158 | exit_code = 0 159 | 160 | for finding in findings: 161 | if not finding.ignore: 162 | exit_code = stacs.scan.constants.EXIT_CODE_UNSUPPRESSED 163 | 164 | # Pretty print, if requested. 165 | if pretty: 166 | logger.info("Generating 'pretty' output from findings") 167 | stacs.scan.output.pretty.render(findings, pack) 168 | sys.exit(exit_code) 169 | 170 | # Default to SARIF output to STDOUT. 171 | logger.info("Generating SARIF from findings") 172 | try: 173 | sarif = stacs.scan.output.sarif.render(path, findings, pack) 174 | except stacs.scan.exceptions.STACSException as err: 175 | logger.error(f"Unable to generate SARIF: {err}") 176 | sys.exit(-3) 177 | 178 | # TODO: Add file output as an option. 179 | logger.info(f"Found {len(findings)} findings") 180 | print(sarif) 181 | -------------------------------------------------------------------------------- /stacs/scan/exceptions.py: -------------------------------------------------------------------------------- 1 | """STACS Exceptions. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | 7 | class STACSException(Exception): 8 | """The most generic form of exception raised by STACS.""" 9 | 10 | 11 | class FileAccessException(STACSException): 12 | """Indicates an error occured while attempting to access a file.""" 13 | 14 | 15 | class InvalidFileException(STACSException): 16 | """Indicates the format of a file did not match what was expected.""" 17 | 18 | 19 | class InvalidFormatException(STACSException): 20 | """Indicates that the format of a rule did not match what was expected.""" 21 | 22 | 23 | class IgnoreListException(STACSException): 24 | """Indicates an invalid ignore list was provided.""" 25 | 26 | 27 | class NotImplementedException(STACSException): 28 | """Indicates that the requested method has not been implemented.""" 29 | 30 | 31 | class NoParentException(STACSException): 32 | """Indicates that a finding does not have a parent.""" 33 | -------------------------------------------------------------------------------- /stacs/scan/filter/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines filters supported by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.filter import ignore_list # noqa: F401 7 | -------------------------------------------------------------------------------- /stacs/scan/filter/ignore_list.py: -------------------------------------------------------------------------------- 1 | """Defines a filter which sets the ignore flag on entries present in an ignore list. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import re 7 | from typing import List 8 | 9 | from stacs.scan.exceptions import IgnoreListException 10 | from stacs.scan.model import finding, ignore_list 11 | 12 | 13 | def by_pattern(finding: finding.Entry, ignore: ignore_list.Entry) -> bool: 14 | """Process a regex ignore list entry.""" 15 | # Short circuit if no pattern is set. 16 | if not ignore.pattern: 17 | return False 18 | 19 | # If there's a match on the path, check whether the ignore is for the same module. 20 | if re.search(ignore.pattern, finding.path): 21 | if ignore.module != finding.source.module: 22 | return False 23 | 24 | # Then check whether the ignore is for the particular reference. 25 | if ignore.references: 26 | if finding.source.reference in ignore.references: 27 | return True 28 | 29 | return False 30 | 31 | # Or check whether the ignore is for the same offest. 32 | if ignore.offset is not None: 33 | if finding.location.offset == ignore.offset: 34 | return True 35 | return False 36 | 37 | # In this case this is a fairly permissive ignore. 38 | return True 39 | 40 | return False 41 | 42 | 43 | def by_path(finding: finding.Entry, ignore: ignore_list.Entry) -> bool: 44 | """Process a path based ignore list entry.""" 45 | # Short circuit if no path is set. 46 | if not ignore.path: 47 | return False 48 | 49 | # If there's a match on the hash, check whether the ignore is for the same module. 50 | if ignore.path == finding.path: 51 | if finding.source.module != ignore.module: 52 | return False 53 | 54 | # Then check whether the ignore is for the particular reference. 55 | if ignore.references: 56 | if finding.source.reference in ignore.references: 57 | return True 58 | return False 59 | 60 | # Or check whether the ignore is for the same offest. 61 | if ignore.offset is not None: 62 | if finding.location.offset == ignore.offset: 63 | return True 64 | return False 65 | 66 | # In this case this is a fairly permissive ignore. 67 | return True 68 | 69 | return False 70 | 71 | 72 | def by_hash(finding: finding.Entry, ignore: ignore_list.Entry) -> bool: 73 | """Process a hash based ignore list entry.""" 74 | # Short circuit if no hash is set. 75 | if not ignore.md5: 76 | return False 77 | 78 | # If there's a match on the hash, check whether the ignore is for the same module. 79 | if ignore.md5 == finding.md5: 80 | if finding.source.module != ignore.module: 81 | return False 82 | 83 | # Then check whether the ignore is for the particular reference. 84 | if ignore.references: 85 | if finding.source.reference in ignore.references: 86 | return True 87 | return False 88 | 89 | # Or check whether the ignore is for the same offest. 90 | if ignore.offset is not None: 91 | if finding.location.offset == ignore.offset: 92 | return True 93 | return False 94 | 95 | # In this case this is a fairly permissive ignore. 96 | return True 97 | 98 | return False 99 | 100 | 101 | def process( 102 | findings: List[finding.Entry], 103 | ignore_list: ignore_list.Format, 104 | ) -> List[finding.Entry]: 105 | """Processes an ignore list and marks the relevant findings as ignored.""" 106 | filtered_findings = [] 107 | 108 | for entry in findings: 109 | for ignore in ignore_list.ignore: 110 | try: 111 | if by_path(entry, ignore): 112 | ignore = finding.Ignore( 113 | ignored=True, 114 | reason=ignore.reason, 115 | ) 116 | entry.ignore = ignore 117 | break 118 | 119 | if by_pattern(entry, ignore): 120 | ignore = finding.Ignore( 121 | ignored=True, 122 | reason=ignore.reason, 123 | ) 124 | entry.ignore = ignore 125 | break 126 | 127 | if by_hash(entry, ignore): 128 | ignore = finding.Ignore( 129 | ignored=True, 130 | reason=ignore.reason, 131 | ) 132 | entry.ignore = ignore 133 | break 134 | except re.error as err: 135 | raise IgnoreListException( 136 | f"Error in ignore list entry '{ignore.reason}': {err}" 137 | ) 138 | 139 | # Add the finding to our results, whether updated or not. 140 | filtered_findings.append(entry) 141 | 142 | return filtered_findings 143 | -------------------------------------------------------------------------------- /stacs/scan/helper.py: -------------------------------------------------------------------------------- 1 | """Define helpers commonly used throughout STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | from typing import List 6 | 7 | import colorama 8 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR 9 | from stacs.scan.exceptions import NoParentException 10 | 11 | 12 | def generate_virtual_path( 13 | finding: "Finding", # noqa: F821 14 | artifacts: "List[Artifact]", # noqa: F821 15 | ): 16 | """Generate a virtual path for an input file.""" 17 | virtual_path = finding.filepath 18 | 19 | try: 20 | parent = artifacts[finding.artifact].parent 21 | 22 | while True: 23 | name = artifacts[parent].filepath 24 | virtual_path = f"{name}{ARCHIVE_FILE_SEPARATOR}{virtual_path}" 25 | 26 | parent = artifacts[parent].parent 27 | except NoParentException: 28 | return virtual_path 29 | 30 | 31 | def printi(string, indent: int = 4, prefix: str = None): 32 | """Super janky wrapper to print something indented.""" 33 | for line in string.splitlines(): 34 | if prefix: 35 | print(f"{prefix}", end="") 36 | 37 | print(f"{' ' * indent}" + line) 38 | 39 | 40 | def banner(version: str) -> str: 41 | """Returns a STACS console banner.""" 42 | banner = colorama.Fore.BLUE 43 | banner += rf""" 44 | ______________ ___________ 45 | / ___/_ __/ | / ____/ ___/ 46 | \__ \ / / / /| |/ / \__ \ 47 | ___/ // / / ___ / /___ ___/ / 48 | /____//_/ /_/ |_\____//____/ 49 | 50 | STACS version {version} 51 | """ 52 | return banner 53 | -------------------------------------------------------------------------------- /stacs/scan/loader/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines loaders used by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.loader import archive # noqa:F401 7 | from stacs.scan.loader import filepath # noqa:F401 8 | from stacs.scan.loader import manifest # noqa:F401 9 | -------------------------------------------------------------------------------- /stacs/scan/loader/archive.py: -------------------------------------------------------------------------------- 1 | """Defines handlers for unpacking of archives. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import bz2 7 | import gzip 8 | import hashlib 9 | import logging 10 | import lzma 11 | import os 12 | import shutil 13 | import tarfile 14 | import zipfile 15 | import zlib 16 | from typing import List, Tuple 17 | 18 | import zstandard 19 | from stacs.native import archive 20 | from stacs.scan.constants import CHUNK_SIZE 21 | from stacs.scan.exceptions import FileAccessException, InvalidFileException 22 | from stacs.scan.loader.format import dmg, xar 23 | 24 | 25 | def path_hash(filepath: str) -> str: 26 | """Returns a hash of the filepath, for use with unique directory creation.""" 27 | return hashlib.md5(bytes(filepath, "utf-8")).hexdigest() 28 | 29 | 30 | def zip_handler(filepath: str, directory: str) -> None: 31 | """Attempts to extract the provided zip archive.""" 32 | log = logging.getLogger(__name__) 33 | 34 | try: 35 | os.mkdir(directory, mode=0o700) 36 | except OSError as err: 37 | raise FileAccessException( 38 | f"Unable to create unpack directory at {directory}: {err}" 39 | ) 40 | 41 | # Attempt to unpack the zipfile to the new unpack directory. 42 | try: 43 | with zipfile.ZipFile(filepath, "r") as reader: 44 | try: 45 | reader.extractall(directory) 46 | except RuntimeError as err: 47 | # Encrypted zips (why is this not a custom exception?!) 48 | if "encrypted" in str(err): 49 | log.warn( 50 | f"Cannot process file in archive at {filepath}, skipping: {err}" 51 | ) 52 | except NotADirectoryError as err: 53 | # Broken filepaths inside of ZIP. 54 | log.warn( 55 | f"Cannot process file in archive at {filepath}, skipping: {err}" 56 | ) 57 | except (OSError, IndexError) as err: 58 | # Several conditions, but usually a corrupt / bad input zip. 59 | log.warn( 60 | f"Cannot process file in archive at {filepath}, skipping: {err}" 61 | ) 62 | except (zipfile.BadZipFile, OSError) as err: 63 | raise InvalidFileException( 64 | f"Unable to extract archive {filepath} to {directory}: {err}" 65 | ) 66 | 67 | 68 | def tar_handler(filepath: str, directory: str) -> None: 69 | """Attempts to extract the provided tarball.""" 70 | try: 71 | os.mkdir(directory, mode=0o700) 72 | except OSError as err: 73 | raise FileAccessException( 74 | f"Unable to create unpack directory at {directory}: {err}" 75 | ) 76 | 77 | # Attempt to unpack the tarball to the new unpack directory. 78 | try: 79 | with tarfile.open(filepath, "r") as reader: 80 | reader.extractall(directory) 81 | except (PermissionError, tarfile.TarError) as err: 82 | raise InvalidFileException( 83 | f"Unable to extract archive {filepath} to {directory}: {err}" 84 | ) 85 | 86 | 87 | def gzip_handler(filepath: str, directory: str) -> None: 88 | """Attempts to extract the provided gzip archive.""" 89 | output = ".".join(os.path.basename(filepath).split(".")[:-1]) 90 | 91 | # No dots? Just use the name as is. 92 | if len(output) < 1: 93 | output = os.path.basename(filepath) 94 | 95 | # Although gzip files cannot contain more than one file, we'll still spool into 96 | # a subdirectory under the cache for consistency. 97 | try: 98 | os.mkdir(directory, mode=0o700) 99 | except OSError as err: 100 | raise FileAccessException( 101 | f"Unable to create unpack directory at {directory}: {err}" 102 | ) 103 | 104 | # TODO: This can likely be optimized for tgz files, as currently the file will be 105 | # first processed and gunzipped, and then reprocessed to be extracted. 106 | try: 107 | with gzip.open(filepath, "rb") as fin: 108 | with open(os.path.join(directory, output), "wb") as fout: 109 | shutil.copyfileobj(fin, fout, CHUNK_SIZE) 110 | except gzip.BadGzipFile as err: 111 | raise InvalidFileException( 112 | f"Unable to extract archive {filepath} to {output}: {err}" 113 | ) 114 | 115 | 116 | def bzip2_handler(filepath: str, directory: str) -> None: 117 | """Attempts to extract the provided bzip2 archive.""" 118 | output = ".".join(os.path.basename(filepath).split(".")[:-1]) 119 | 120 | # No dots? Just use the name as is. 121 | if len(output) < 1: 122 | output = os.path.basename(filepath) 123 | 124 | # Like gzip, bzip2 cannot support more than a single file. Again, we'll spool into 125 | # a subdirectory for consistency. 126 | try: 127 | os.mkdir(directory, mode=0o700) 128 | except OSError as err: 129 | raise FileAccessException( 130 | f"Unable to create unpack directory at {directory}: {err}" 131 | ) 132 | 133 | # TODO: This can likely be optimized for tbz files, as currently the file will be 134 | # first processed and gunzipped, and then reprocessed to be extracted. 135 | try: 136 | with bz2.open(filepath, "rb") as fin: 137 | with open(os.path.join(directory, output), "wb") as fout: 138 | shutil.copyfileobj(fin, fout, CHUNK_SIZE) 139 | except (OSError, ValueError) as err: 140 | raise InvalidFileException( 141 | f"Unable to extract archive {filepath} to {output}: {err}" 142 | ) 143 | 144 | 145 | def zstd_handler(filepath: str, directory: str) -> None: 146 | """Attempts to extract the provided zstd archive.""" 147 | output = ".".join(os.path.basename(filepath).split(".")[:-1]) 148 | 149 | # No dots? Just use the name as is. 150 | if len(output) < 1: 151 | output = os.path.basename(filepath) 152 | 153 | # zstd does not appear to provide a native mechanism to compress multiple files, 154 | # and recommend 'to combine zstd with tar'. 155 | try: 156 | os.mkdir(directory, mode=0o700) 157 | except OSError as err: 158 | raise FileAccessException( 159 | f"Unable to create unpack directory at {directory}: {err}" 160 | ) 161 | 162 | try: 163 | decompressor = zstandard.ZstdDecompressor() 164 | 165 | with open(filepath, "rb") as fin: 166 | with open(os.path.join(directory, output), "wb") as fout: 167 | decompressor.copy_stream(fin, fout, read_size=CHUNK_SIZE) 168 | except (OSError, ValueError, zstandard.ZstdError) as err: 169 | raise InvalidFileException( 170 | f"Unable to extract archive {filepath} to {output}: {err}" 171 | ) 172 | 173 | 174 | def lzma_handler(filepath: str, directory: str) -> None: 175 | """Attempts to extract the provided xz / lzma archive.""" 176 | output = ".".join(os.path.basename(filepath).split(".")[:-1]) 177 | 178 | # No dots? Just use the name as is. 179 | if len(output) < 1: 180 | output = os.path.basename(filepath) 181 | 182 | # Although xz files cannot contain more than one file, we'll still spool into 183 | # a subdirectory under the cache for consistency. 184 | try: 185 | os.mkdir(directory, mode=0o700) 186 | except OSError as err: 187 | raise FileAccessException( 188 | f"Unable to create unpack directory at {directory}: {err}" 189 | ) 190 | 191 | try: 192 | with lzma.open(filepath, "rb") as fin: 193 | with open(os.path.join(directory, output), "wb") as fout: 194 | shutil.copyfileobj(fin, fout, CHUNK_SIZE) 195 | except lzma.LZMAError as err: 196 | raise InvalidFileException( 197 | f"Unable to extract archive {filepath} to {output}: {err}" 198 | ) 199 | 200 | 201 | def zlib_handler(filepath: str, directory: str) -> None: 202 | """Attempts to extract the provided zlib archive.""" 203 | output = ".".join(os.path.basename(filepath).split(".")[:-1]) 204 | 205 | # No dots? Just use the name as is. 206 | if len(output) < 1: 207 | output = os.path.basename(filepath) 208 | 209 | try: 210 | os.mkdir(directory, mode=0o700) 211 | except OSError as err: 212 | raise FileAccessException( 213 | f"Unable to create unpack directory at {directory}: {err}" 214 | ) 215 | 216 | try: 217 | decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS) 218 | 219 | with open(filepath, "rb") as fin: 220 | with open(os.path.join(directory, output), "wb") as fout: 221 | while compressed := fin.read(CHUNK_SIZE): 222 | fout.write(decompressor.decompress(compressed)) 223 | except zlib.error as err: 224 | raise InvalidFileException( 225 | f"Unable to extract archive {filepath} to {output}: {err}" 226 | ) 227 | 228 | 229 | def xar_handler(filepath: str, directory: str) -> None: 230 | """Attempts to extract the provided XAR archive.""" 231 | try: 232 | os.mkdir(directory, mode=0o700) 233 | except OSError as err: 234 | raise FileAccessException( 235 | f"Unable to create unpack directory at {directory}: {err}" 236 | ) 237 | 238 | # Attempt to unpack the archive. 239 | try: 240 | archive = xar.XAR(filepath) 241 | archive.extract(directory) 242 | except FileAccessException as err: 243 | raise FileAccessException( 244 | f"Unable to extract archive {filepath} to {directory}: {err}" 245 | ) 246 | except InvalidFileException as err: 247 | raise InvalidFileException( 248 | f"Unable to extract archive {filepath} to {directory}: {err}" 249 | ) 250 | 251 | 252 | def dmg_handler(filepath: str, directory: str) -> None: 253 | """Attempts to extract the provided DMG archive.""" 254 | try: 255 | os.mkdir(directory, mode=0o700) 256 | except OSError as err: 257 | raise FileAccessException( 258 | f"Unable to create unpack directory at {directory}: {err}" 259 | ) 260 | 261 | # Attempt to unpack the archive. 262 | try: 263 | archive = dmg.DMG(filepath) 264 | archive.extract(directory) 265 | except FileAccessException as err: 266 | raise FileAccessException( 267 | f"Unable to extract archive {filepath} to {directory}: {err}" 268 | ) 269 | except InvalidFileException as err: 270 | raise InvalidFileException( 271 | f"Unable to extract archive {filepath} to {directory}: {err}" 272 | ) 273 | 274 | 275 | def libarchive_handler(filepath: str, directory: str) -> None: 276 | """Attempts to extract the provided archive with libarchive.""" 277 | try: 278 | os.mkdir(directory, mode=0o700) 279 | except OSError as err: 280 | raise FileAccessException( 281 | f"Unable to create unpack directory at {directory}: {err}" 282 | ) 283 | 284 | # Attempt to unpack the archive to the new unpack directory. 285 | try: 286 | with archive.ArchiveReader(filepath) as reader: 287 | for entry in reader: 288 | member = entry.filename 289 | member = member.lstrip("../") 290 | member = member.lstrip("./") 291 | 292 | if entry.filename == ".": 293 | continue 294 | 295 | destination = os.path.join(directory, member) 296 | parent = os.path.dirname(destination) 297 | 298 | # Handle odd cases where a file was created where a directory needs to 299 | # be. 300 | if os.path.exists(parent) and os.path.isfile(parent): 301 | os.unlink(parent) 302 | 303 | if os.path.isdir(destination): 304 | continue 305 | 306 | # Create parent directories, as required. 307 | if not os.path.isdir(parent): 308 | os.makedirs(parent) 309 | 310 | # If the entry is a directory, create it and move on. 311 | if entry.isdir: 312 | os.makedirs(destination, exist_ok=True) 313 | continue 314 | 315 | with open(destination, "wb") as fout: 316 | while True: 317 | chunk = reader.read() 318 | if len(chunk) > 0: 319 | fout.write(chunk) 320 | continue 321 | break 322 | except archive.ArchiveError as err: 323 | raise InvalidFileException( 324 | f"Unable to extract archive {filepath} to {directory}: {err}" 325 | ) 326 | 327 | 328 | def get_mimetype(chunk: bytes, start: bool) -> List[Tuple[int, str]]: 329 | """Attempts to locate the appropriate handler for a given file. 330 | 331 | This may fail if the required "magic" is at an offset greater than the CHUNK_SIZE. 332 | However, currently this is not an issue, but may need to be revisited later as more 333 | archive types are supported. 334 | 335 | The start flag is used to indicate whether the current chunk is from the start of 336 | the file, or the end of the file. Today we only support checking the first and last 337 | chunk. 338 | 339 | Returns a list of weights and MIME types as a tuple. This weight is specified by 340 | handlers and is used to allow "container" formats, which may contain multiple other 341 | files of various matching types, to "win" the match - due to a higher weight. 342 | """ 343 | for name, options in MIME_TYPE_HANDLERS.items(): 344 | offset = options["offset"] 345 | magic = options["magic"] 346 | 347 | # If looking at the last chunk, only use negative offsets. This is to prevent 348 | # false positives as position 0 in the last chunk is actually N bytes into the 349 | # file. This is especially problematic for formats with short magic numbers, 350 | # such as zlib. 351 | if not start and offset >= 0: 352 | continue 353 | 354 | # TODO: How to handle multiple matches in the same chunk? Is this this likely? 355 | for format in magic: 356 | if chunk[offset : (offset + len(format))] == format: # noqa: E203 357 | return (options["weight"], name) 358 | 359 | return (0, None) 360 | 361 | 362 | # Define all supported archives and their handlers. As we currently only support a small 363 | # list of types we can just define file magic directly here, rather than use an external 364 | # library. This removes the need for dependencies which may have other system 365 | # dependencies - such as libmagic. It should also provide a small a speed up during 366 | # unpacking, as we're only looking for a small number of types. 367 | MIME_TYPE_HANDLERS = { 368 | "application/x-tar": { 369 | "weight": 1, 370 | "offset": 257, 371 | "magic": [ 372 | bytearray([0x75, 0x73, 0x74, 0x61, 0x72]), 373 | ], 374 | "handler": tar_handler, 375 | }, 376 | "application/gzip": { 377 | "weight": 1, 378 | "offset": 0, 379 | "magic": [ 380 | bytearray([0x1F, 0x8B]), 381 | ], 382 | "handler": gzip_handler, 383 | }, 384 | "application/x-bzip2": { 385 | "weight": 1, 386 | "offset": 0, 387 | "magic": [ 388 | bytearray([0x42, 0x5A, 0x68]), 389 | ], 390 | "handler": bzip2_handler, 391 | }, 392 | "application/zip": { 393 | "weight": 1, 394 | "offset": 0, 395 | "magic": [ 396 | bytearray([0x50, 0x4B, 0x03, 0x04]), 397 | bytearray([0x50, 0x4B, 0x05, 0x06]), 398 | bytearray([0x50, 0x4B, 0x07, 0x08]), 399 | ], 400 | "handler": zip_handler, 401 | }, 402 | "application/zlib": { 403 | "weight": 1, 404 | "offset": 0, 405 | "magic": [ 406 | bytearray([0x78, 0x01]), 407 | bytearray([0x78, 0x5E]), 408 | bytearray([0x78, 0x9C]), 409 | bytearray([0x78, 0xDA]), 410 | ], 411 | "handler": zlib_handler, 412 | }, 413 | "application/x-xz": { 414 | "weight": 1, 415 | "offset": 0, 416 | "magic": [ 417 | bytearray([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]), 418 | ], 419 | "handler": lzma_handler, 420 | }, 421 | "application/x-rpm": { 422 | "weight": 1, 423 | "offset": 0, 424 | "magic": [ 425 | bytearray([0xED, 0xAB, 0xEE, 0xDB]), 426 | ], 427 | "handler": libarchive_handler, 428 | }, 429 | "application/x-iso9660-image": { 430 | "weight": 1, 431 | "offset": 0x8001, 432 | "magic": [ 433 | bytearray([0x43, 0x44, 0x30, 0x30, 0x31]), 434 | ], 435 | "handler": libarchive_handler, 436 | }, 437 | "application/x-7z-compressed": { 438 | "weight": 1, 439 | "offset": 0, 440 | "magic": [ 441 | bytearray([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]), 442 | ], 443 | "handler": libarchive_handler, 444 | }, 445 | "application/x-cpio": { 446 | "weight": 1, 447 | "offset": 0, 448 | "magic": [ 449 | bytearray([0xC7, 0x71]), # 070707 in octal (Little Endian). 450 | bytearray([0x71, 0xC7]), # 070707 in octal (Big Endian). 451 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x31]), # "070701" 452 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x32]), # "070702" 453 | bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x37]), # "070707" 454 | ], 455 | "handler": libarchive_handler, 456 | }, 457 | "application/x-xar": { 458 | "weight": 1, 459 | "offset": 0, 460 | "magic": [ 461 | bytearray([0x78, 0x61, 0x72, 0x21]), 462 | ], 463 | "handler": xar_handler, 464 | }, 465 | "application/vnd.ms-cab-compressed": { 466 | "weight": 1, 467 | "offset": 0, 468 | "magic": [ 469 | bytearray([0x4D, 0x53, 0x43, 0x46]), 470 | ], 471 | "handler": libarchive_handler, 472 | }, 473 | "application/x-archive": { 474 | "weight": 1, 475 | "offset": 0, 476 | "magic": [ 477 | bytearray([0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E]), 478 | ], 479 | "handler": libarchive_handler, 480 | }, 481 | "application/vnd.rar": { 482 | "weight": 1, 483 | "offset": 0, 484 | "magic": [ 485 | bytearray([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]), 486 | ], 487 | "handler": libarchive_handler, 488 | }, 489 | "application/zstd": { 490 | "weight": 1, 491 | "offset": 0, 492 | "magic": [ 493 | bytearray([0x28, 0xB5, 0x2F, 0xFD]), 494 | ], 495 | "handler": zstd_handler, 496 | }, 497 | "application/x-apple-diskimage": { 498 | "weight": 2, # "container" formats are weighted higher. 499 | "offset": -512, 500 | "magic": [ 501 | bytearray([0x6B, 0x6F, 0x6C, 0x79]), 502 | ], 503 | "handler": dmg_handler, 504 | }, 505 | } 506 | -------------------------------------------------------------------------------- /stacs/scan/loader/filepath.py: -------------------------------------------------------------------------------- 1 | """Defines a file path loader for STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import hashlib 7 | import logging 8 | import os 9 | import re 10 | import shutil 11 | from concurrent.futures import ThreadPoolExecutor, as_completed 12 | from typing import List 13 | 14 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR, CHUNK_SIZE 15 | from stacs.scan.exceptions import FileAccessException, InvalidFileException 16 | from stacs.scan.loader import archive 17 | from stacs.scan.model.manifest import Entry 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def metadata(filepath: str, overlay: str = None, parent: str = None) -> Entry: 23 | """Generates a hash and determines the mimetype of the input file.""" 24 | md5 = hashlib.md5() 25 | mime = None 26 | winner = 0 27 | 28 | # Read the file in chunks. 29 | try: 30 | stat = os.stat(filepath) 31 | 32 | with open(filepath, "rb") as fin: 33 | while chunk := fin.read(CHUNK_SIZE): 34 | md5.update(chunk) 35 | 36 | # Attempt to determine the mime-type using the first and last chunk. 37 | # Note: This may need to change further in future. 38 | if (not mime and fin.tell() <= CHUNK_SIZE) or len(chunk) < CHUNK_SIZE: 39 | start = False if len(chunk) < CHUNK_SIZE else True 40 | (score, candidate) = archive.get_mimetype(chunk, start) 41 | 42 | # Swap the winner if the score is higher. 43 | if score > winner: 44 | mime = candidate 45 | winner = score 46 | except OSError as err: 47 | raise FileAccessException(f"Unable to open file at {filepath}: {err}") 48 | 49 | return Entry( 50 | path=filepath, 51 | md5=md5.hexdigest(), 52 | mime=mime, 53 | overlay=overlay, 54 | parent=parent, 55 | size=stat.st_size, 56 | ) 57 | 58 | 59 | def walker(path: str, skip_on_eacces: bool) -> List[str]: 60 | """Recursively walk a file path, returning a list of all files.""" 61 | entries = [] 62 | 63 | # TODO: Would moving walker to a generator yield a performance increase, or lead to 64 | # higher disk contention due to the hasher running at the same time? 65 | try: 66 | with os.scandir(path) as scan: 67 | for handle in scan: 68 | try: 69 | # Recurse on directories, but not symlinks. 70 | if handle.is_dir() and not handle.is_symlink(): 71 | entries.extend(walker(handle.path, skip_on_eacces)) 72 | 73 | # Track files, but not symlinks. 74 | if handle.is_file() and not handle.is_symlink(): 75 | entries.append(handle.path) 76 | except PermissionError: 77 | if not skip_on_eacces: 78 | raise 79 | except OSError: 80 | # This is usually due to too many levels of symlinks. However, other 81 | # cases are likely with a large enough input. 82 | continue 83 | except NotADirectoryError: 84 | entries.append(path) 85 | 86 | return list(set(entries)) 87 | 88 | 89 | def qualify(path: str) -> str: 90 | """Add the scheme to a file path, if required.""" 91 | if path.startswith("/"): 92 | return f"file://{path}" 93 | else: 94 | return path 95 | 96 | 97 | def finder( 98 | path: str, 99 | cache: str, 100 | workers: int = 10, 101 | skip_on_eacces: bool = True, 102 | skip_on_corrupt: bool = False, 103 | ) -> List[Entry]: 104 | """Processes the input path, returning a list of all files and their hashes.""" 105 | entries = [] 106 | futures = dict() 107 | 108 | # Run the metadata enumerator in a thread pool as we're likely to be I/O bound. 109 | with ThreadPoolExecutor(max_workers=workers) as pool: 110 | futures = { 111 | pool.submit(metadata, file): file for file in walker(path, skip_on_eacces) 112 | } 113 | 114 | # A loop and counter is used here to ensure that additional work which may be 115 | # submitted during the 'final loop' isn't accidentally ignored. 116 | while True: 117 | complete = 0 118 | for future in as_completed(futures): 119 | complete += 1 120 | 121 | try: 122 | result = future.result() 123 | except FileAccessException: 124 | if not skip_on_eacces: 125 | raise 126 | 127 | # Track the result and then remove the future from the initial futures 128 | # list so that these results aren't returned again next iteration. 129 | entries.append(result) 130 | del futures[future] 131 | 132 | # Check it the file was found to be an archive, and if so, unpack it. 133 | handler = archive.MIME_TYPE_HANDLERS.get(result.mime, {}).get("handler") 134 | if not handler: 135 | continue 136 | 137 | # Remove any existing previously unpacked files, then unpack the archive 138 | # and submit extracted files back into the queue. This is to allow for 139 | # easy recursive unpacking of nested archives. 140 | destination = os.path.join(cache, archive.path_hash(result.path)) 141 | shutil.rmtree(destination, ignore_errors=True) 142 | 143 | try: 144 | handler(result.path, destination) 145 | except InvalidFileException as err: 146 | # Only skip with a warning if explicitly configured to do so. 147 | if skip_on_corrupt: 148 | logger.warning( 149 | f"Skipping file at {result.path} due to error when " 150 | f"processing: {err}" 151 | ) 152 | else: 153 | raise 154 | 155 | for file in walker(destination, skip_on_eacces): 156 | # The overlay path is a 'virtual' path that is constructed based on 157 | # the archive the file appears inside of, and the path of the file 158 | # inside of the archive. However, as archives may be nested, we need 159 | # to check whether we already have an overlay and, if set, use that 160 | # value instead. 161 | if result.overlay: 162 | parent = result.overlay 163 | else: 164 | parent = result.path 165 | 166 | logger.debug(f"Processing {file}, extracted from archive {parent}") 167 | overlay = ( 168 | f"{parent}" 169 | f"{ARCHIVE_FILE_SEPARATOR}" 170 | f"{re.sub(rf'^{destination}/?', '', file)}" 171 | ) 172 | 173 | # Submit back to the pool for processing. 174 | submission = pool.submit( 175 | metadata, file, overlay=overlay, parent=result.md5 176 | ) 177 | futures[submission] = file 178 | 179 | if complete == 0: 180 | break 181 | 182 | return entries 183 | -------------------------------------------------------------------------------- /stacs/scan/loader/format/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines file format handlers used by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.loader.format import dmg, xar # noqa: F401 7 | -------------------------------------------------------------------------------- /stacs/scan/loader/format/dmg.py: -------------------------------------------------------------------------------- 1 | """Provides an Apple Disk Image (DMG) parser and extractor. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import bz2 7 | import lzma 8 | import os 9 | import plistlib 10 | import struct 11 | import zlib 12 | from collections import namedtuple 13 | from typing import List 14 | 15 | from pydantic import BaseModel, Extra, Field 16 | from stacs.scan.exceptions import FileAccessException, InvalidFileException 17 | 18 | # Structures names and geometry are via "Demystifying the DMG File Format" 19 | # by Jonathan Levin (http://newosxbook.com/). 20 | DMG_HEADER_MAGIC = b"koly" 21 | DMG_HEADER = ">4sIIIQQQQQII16sII128sQQ120sII128sIQIII" 22 | DMG_HEADER_MAGIC_SZ = len(DMG_HEADER_MAGIC) 23 | DMG_HEADER_SZ = struct.calcsize(DMG_HEADER) 24 | 25 | DMG_BLOCK_TABLE_MAGIC = b"mish" 26 | DMG_BLOCK_TABLE = ">4sIQQQIIIIIIIIII128sI" 27 | DMG_BLOCK_TABLE_MAGIC_SZ = len(DMG_BLOCK_TABLE_MAGIC) 28 | DMG_BLOCK_TABLE_SZ = struct.calcsize(DMG_BLOCK_TABLE) 29 | 30 | DMG_BLOCK_CHUNK = ">I4sQQQQ" 31 | DMG_BLOCK_CHUNK_SZ = struct.calcsize(DMG_BLOCK_CHUNK) 32 | 33 | DMGHeader = namedtuple( 34 | "DMGHeader", 35 | [ 36 | "signature", 37 | "version", 38 | "header_size", 39 | "flags", 40 | "running_data_fork_offset", 41 | "data_fork_offset", 42 | "data_fork_length", 43 | "rsrc_fork_offset", 44 | "rsrc_fork_length", 45 | "segment_number", 46 | "segment_count", 47 | "segment_id", 48 | "data_checksum_type", 49 | "data_checksum_size", 50 | "data_checksum", 51 | "xml_offset", 52 | "xml_length", 53 | "reserved_1", 54 | "checksum_Type", 55 | "checksum_Size", 56 | "checksum", 57 | "image_variant", 58 | "sector_count", 59 | "reserved_2", 60 | "reserved_3", 61 | "reserved_4", 62 | ], 63 | ) 64 | DMGBlockTable = namedtuple( 65 | "DMGBlockTable", 66 | [ 67 | "signature", 68 | "version", 69 | "sector_number", 70 | "sector_count", 71 | "data_offset", 72 | "buffers_needed", 73 | "block_descriptors", 74 | "reserved_1", 75 | "reserved_2", 76 | "reserved_3", 77 | "reserved_4", 78 | "reserved_5", 79 | "reserved_6", 80 | "checksum_ype", 81 | "checksum_ize", 82 | "checksum", 83 | "chunk_count", 84 | ], 85 | ) 86 | DMGBlockChunk = namedtuple( 87 | "DMGBlockChunk", 88 | [ 89 | "type", 90 | "comment", 91 | "sector_number", 92 | "sector_count", 93 | "compressed_offset", 94 | "compressed_length", 95 | ], 96 | ) 97 | 98 | 99 | class DMGBlock(BaseModel, extra=Extra.forbid): 100 | """Expresses a DMG block entry and its chunks.""" 101 | 102 | name: str 103 | chunks: List[DMGBlockChunk] = Field([]) 104 | 105 | 106 | class DMG: 107 | """Provides an Apple Disk Image (DMG) parser and extractor.""" 108 | 109 | def __init__(self, filepath: str): 110 | self.archive = filepath 111 | 112 | try: 113 | with open(self.archive, "rb") as fin: 114 | # DMG metadata is at the end of the file. 115 | fin.seek(-DMG_HEADER_SZ, 2) 116 | 117 | # Ensure the provided file is actually a DMG. 118 | if fin.read(DMG_HEADER_MAGIC_SZ) != DMG_HEADER_MAGIC: 119 | raise InvalidFileException("File does not appear to be a DMG") 120 | 121 | # Rewind and attempt to read in header. 122 | fin.seek(-DMG_HEADER_MAGIC_SZ, 1) 123 | self._header = DMGHeader._make( 124 | struct.unpack(DMG_HEADER, fin.read(DMG_HEADER_SZ)) 125 | ) 126 | 127 | # Read the XML property list. 128 | fin.seek(self._header.xml_offset, 0) 129 | self._plist = plistlib.loads(fin.read(self._header.xml_length)) 130 | except OSError as err: 131 | raise FileAccessException(f"Unable to read archive: {err}") 132 | 133 | def _parse_blocks(self) -> List[DMGBlock]: 134 | """Recursively parse blocks and their associated chunks.""" 135 | candidates = [] 136 | 137 | # Read the BLKX entries from the resource-fork section of the plist. 138 | for entry in self._plist.get("resource-fork", {}).get("blkx", []): 139 | data = entry.get("Data") 140 | name = entry.get("Name") 141 | 142 | block = DMGBlock(name=name) 143 | table = DMGBlockTable._make( 144 | struct.unpack(DMG_BLOCK_TABLE, data[0:DMG_BLOCK_TABLE_SZ]) 145 | ) 146 | 147 | # Extract all blocks and their associated chunks from the encoded "Data" 148 | # inside of the extracted plist. 149 | start = DMG_BLOCK_TABLE_SZ 150 | 151 | for _ in range(0, table.chunk_count): 152 | end = start + DMG_BLOCK_CHUNK_SZ 153 | block.chunks.append( 154 | DMGBlockChunk._make(struct.unpack(DMG_BLOCK_CHUNK, data[start:end])) 155 | ) 156 | start = end 157 | 158 | candidates.append(block) 159 | 160 | return candidates 161 | 162 | def extract(self, destination): 163 | """Extract all blocks from the DMG to the optional destination directory.""" 164 | parent = os.path.basename(self.archive) 165 | 166 | try: 167 | os.makedirs(destination, exist_ok=True) 168 | except OSError as err: 169 | raise FileAccessException( 170 | f"Unable to create directory during extraction: {err}" 171 | ) 172 | 173 | # Process each chunk inside of each block. A DMG has multiple blocks, and a 174 | # block has N chunks. 175 | for idx, block in enumerate(self._parse_blocks()): 176 | output = os.path.join(destination, f"{parent}.{idx}.blob") 177 | 178 | for chunk in block.chunks: 179 | # Skip Ignored, Comment, and Last blocks (respectively). 180 | if chunk.type in [0x00000002, 0x7FFFFFFE, 0xFFFFFFFF]: 181 | continue 182 | 183 | try: 184 | with open(self.archive, "rb") as fin, open(output, "ab") as fout: 185 | fin.seek(chunk.compressed_offset) 186 | 187 | # 0x80000005 - Zlib. 188 | if chunk.type == 0x80000005: 189 | fout.write( 190 | zlib.decompress(fin.read(chunk.compressed_length)) 191 | ) 192 | 193 | # 0x80000005 - BZ2. 194 | if chunk.type == 0x80000006: 195 | fout.write( 196 | bz2.decompress(fin.read(chunk.compressed_length)) 197 | ) 198 | 199 | # 0x80000005 - LZMA. 200 | if chunk.type == 0x80000008: 201 | fout.write( 202 | lzma.decompress(fin.read(chunk.compressed_length)) 203 | ) 204 | 205 | # 0x00000000 - Zero Fill. 206 | if chunk.type == 0x00000000: 207 | fout.write(b"\x00" * chunk.compressed_length) 208 | continue 209 | except (OSError, lzma.LZMAError, ValueError) as err: 210 | raise InvalidFileException(err) 211 | -------------------------------------------------------------------------------- /stacs/scan/loader/format/xar.py: -------------------------------------------------------------------------------- 1 | """Provides an eXtensible ARchive parser and extrator. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import os 7 | import struct 8 | import xml.etree.ElementTree as ET 9 | import zlib 10 | from collections import namedtuple 11 | from typing import List 12 | 13 | from stacs.scan.constants import CHUNK_SIZE 14 | from stacs.scan.exceptions import FileAccessException, InvalidFileException 15 | 16 | XAR_MAGIC = b"xar!" 17 | XAR_HEADER = ">4sHHQQI" 18 | XAR_HEADER_SZ = struct.calcsize(XAR_HEADER) 19 | 20 | # via xar/include/xar.h.in 21 | XARHeader = namedtuple( 22 | "XARHeader", 23 | [ 24 | "magic", 25 | "size", 26 | "version", 27 | "toc_length_compressed", 28 | "toc_length_uncompressed", 29 | "cksum_alg", 30 | ], 31 | ) 32 | 33 | XAREntry = namedtuple( 34 | "XAREntry", 35 | [ 36 | "length", 37 | "offset", 38 | "size", 39 | "encoding", 40 | "archived_cksum_kind", 41 | "archived_cksum", 42 | "path", 43 | "name", 44 | "kind", 45 | ], 46 | ) 47 | 48 | 49 | class XAR: 50 | """Provides an eXtensible ARchive Format parser and extrator.""" 51 | 52 | def __init__(self, filepath: str): 53 | self.archive = filepath 54 | 55 | try: 56 | with open(self.archive, "rb") as fin: 57 | # Ensure the provided file is actually a XAR. 58 | if fin.read(4) != XAR_MAGIC: 59 | raise InvalidFileException("File does not appear to be a XAR") 60 | 61 | # Rewind and attempt to read in header. 62 | fin.seek(0) 63 | self._header = XARHeader._make( 64 | struct.unpack(XAR_HEADER, fin.read(XAR_HEADER_SZ)) 65 | ) 66 | 67 | # Read and decompress the table-of-contents. 68 | fin.seek(self._header.size) 69 | 70 | self._toc = ET.fromstring( 71 | str( 72 | zlib.decompress(fin.read(self._header.toc_length_uncompressed)), 73 | "utf-8", 74 | ) 75 | ) 76 | except zlib.error as err: 77 | raise InvalidFileException(f"Unable to read table-of-contents: {err}") 78 | except OSError as err: 79 | raise FileAccessException(f"Unable to read archive: {err}") 80 | 81 | def _parse_entries(self, root, directory="") -> List[XAREntry]: 82 | """Recursively parse entries from the table-of-contents.""" 83 | candidates = [] 84 | 85 | # Strip any slashes, only using the last path component. 86 | kind = root.find(".type").text 87 | name = root.find(".name").text.split("/")[-1] 88 | path = os.path.join(directory, name) 89 | 90 | # Recurse for directories 91 | if kind == "directory": 92 | for element in root.findall(".//file"): 93 | candidates.extend(self._parse_entries(element, directory=path)) 94 | 95 | if kind == "file": 96 | size = int(root.find(".//data/size").text) 97 | length = int(root.find(".//data/length").text) 98 | offset = int(root.find(".//data/offset").text) 99 | encoding = root.find(".//data/encoding").get("style") 100 | archived_cksum = root.find(".//data/archived-checksum").text 101 | archived_cksum_kind = root.find(".//data/archived-checksum").get("style") 102 | 103 | candidates.append( 104 | XAREntry( 105 | length, 106 | offset, 107 | size, 108 | encoding, 109 | archived_cksum, 110 | archived_cksum_kind, 111 | path, 112 | name, 113 | kind, 114 | ) 115 | ) 116 | 117 | return candidates 118 | 119 | def entries(self) -> List[XAREntry]: 120 | """Return a list of entries in this XAR.""" 121 | candidates = [] 122 | 123 | for entry in self._toc.findall("./toc/file"): 124 | candidates.extend(self._parse_entries(entry)) 125 | 126 | return candidates 127 | 128 | def extract(self, destination): 129 | """Extract all entries from the XAR to the optional destination directory.""" 130 | # Offset must be adjusted by the size of the ToC and the header. This is as the 131 | # offset is from the first byte AFTER the header and compressed ToC. 132 | header_size = self._header.size + self._header.toc_length_compressed 133 | 134 | for entry in self.entries(): 135 | parent = os.path.dirname(os.path.join(destination, entry.path)) 136 | 137 | try: 138 | os.makedirs(parent, exist_ok=True) 139 | except OSError as err: 140 | raise FileAccessException( 141 | f"Unable to create directory during extraction: {err}" 142 | ) 143 | 144 | # Check whether a decompressor should be used. 145 | decompressor = None 146 | 147 | if entry.encoding == "application/x-gzip": 148 | decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 32).decompress 149 | 150 | # Perform extraction. 151 | # TODO: No decompression or integrity checking is performed today, nor are 152 | # ownership and modes followed. 153 | remaining = entry.length 154 | 155 | try: 156 | with open(self.archive, "rb") as fin: 157 | with open(os.path.join(destination, entry.path), "wb") as fout: 158 | fin.seek(header_size + entry.offset) 159 | 160 | # Read all data in chunks to not balloon memory when processing 161 | # large files. 162 | while remaining > 0: 163 | delta = remaining - CHUNK_SIZE 164 | if delta < 0: 165 | read_length = remaining 166 | else: 167 | read_length = CHUNK_SIZE 168 | 169 | # Use a decompressor, if required. 170 | if decompressor: 171 | fout.write(decompressor(fin.read(read_length))) 172 | else: 173 | fout.write(fin.read(read_length)) 174 | 175 | remaining -= read_length 176 | except (OSError, zlib.error) as err: 177 | raise InvalidFileException(err) 178 | -------------------------------------------------------------------------------- /stacs/scan/loader/manifest.py: -------------------------------------------------------------------------------- 1 | """Defines a manifest loader for STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | # 7 | # TODO: Implement the manifest loader. This should take the contents of a manifest 8 | # which matches the stacs.scan.model.manifest.Format schema. It should also check 9 | # whether all requested files exist, and generate MD5 sums for them if not 10 | # specified in the manifest 'Entry' (stacs.scan.model.manifest.Entry). 11 | # 12 | -------------------------------------------------------------------------------- /stacs/scan/model/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines models used by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.model import finding # noqa: F401 7 | from stacs.scan.model import ignore_list # noqa: F401 8 | from stacs.scan.model import manifest # noqa: F401 9 | from stacs.scan.model import pack # noqa: F401 10 | -------------------------------------------------------------------------------- /stacs/scan/model/finding.py: -------------------------------------------------------------------------------- 1 | """Defines types to assist with reporting findings. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from typing import List 7 | 8 | from pydantic import BaseModel, Extra, Field 9 | 10 | 11 | class Location(BaseModel, extra=Extra.forbid): 12 | """Defines data associated with a location of a finding.""" 13 | 14 | line: int = Field( 15 | None, 16 | title="The line number which contains the finding.", 17 | ) 18 | offset: int = Field( 19 | None, 20 | title="The offset from the start of the file of the finding (in bytes).", 21 | ) 22 | 23 | 24 | class Source(BaseModel, extra=Extra.forbid): 25 | """Defines data associated with the source of a finding.""" 26 | 27 | module: str = Field( 28 | title="The STACS module which generated the finding.", 29 | ) 30 | description: str = Field( 31 | None, 32 | title="A description of the finding", 33 | ) 34 | reference: str = Field( 35 | title="A reference to the element which generated the finding.", 36 | ) 37 | tags: List[str] = Field( 38 | [], 39 | title="A list of tags associated with the finding.", 40 | ) 41 | version: str = Field( 42 | None, 43 | title="The version of the element which generated the finding.", 44 | ) 45 | 46 | 47 | class Sample(BaseModel, extra=Extra.forbid): 48 | """The content and context of a finding.""" 49 | 50 | window: int = Field( 51 | title="The number of bytes before and after a finding included in the sample.", 52 | ) 53 | before: str = Field( 54 | title="The contents of N bytes before the finding.", 55 | ) 56 | after: str = Field( 57 | title="The contents of N bytes after the finding.", 58 | ) 59 | finding: str = Field( 60 | title="The contents of the finding.", 61 | ) 62 | binary: bool = Field( 63 | title="Indicates that the finding was binary and is base64 encoded." 64 | ) 65 | 66 | 67 | class Ignore(BaseModel, extra=Extra.forbid): 68 | """Defines the ignore schema of a finding.""" 69 | 70 | ignored: bool = Field( 71 | False, 72 | title="Whether the finding should be ignored due to allow list.", 73 | ) 74 | reason: str = Field( 75 | title="The reason to ignore the finding.", 76 | ) 77 | 78 | 79 | class Entry(BaseModel, extra=Extra.forbid): 80 | """Defines the schema of a finding.""" 81 | 82 | path: str = Field( 83 | title="The path to the file.", 84 | ) 85 | md5: str = Field( 86 | title="The MD5 sum of the file.", 87 | ) 88 | confidence: float = Field( 89 | None, 90 | title="The confidence of the finding.", 91 | ) 92 | location: Location = Field( 93 | None, 94 | title="The location of the finding in the input file.", 95 | ) 96 | sample: Sample = Field( 97 | None, 98 | title="Information relating to the content of the finding.", 99 | ) 100 | source: Source = Field( 101 | None, 102 | title="Information about the source of the finding.", 103 | ) 104 | ignore: Ignore = Field( 105 | None, 106 | title="Information about whether the entry should be ignored.", 107 | ) 108 | -------------------------------------------------------------------------------- /stacs/scan/model/ignore_list.py: -------------------------------------------------------------------------------- 1 | """Defines types to assist with loading and processing of ignore lists. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import json 7 | import os 8 | from typing import List 9 | 10 | from pydantic import BaseModel, Extra, Field, validator 11 | from stacs.scan.exceptions import IgnoreListException, STACSException 12 | 13 | 14 | class Entry(BaseModel, extra=Extra.forbid): 15 | """Defines the schema of an ignore.""" 16 | 17 | path: str = Field( 18 | None, 19 | title="The path of a file to ignore.", 20 | ) 21 | pattern: str = Field( 22 | None, 23 | title="A pattern of the file path to ignore.", 24 | ) 25 | reason: str = Field( 26 | title="The reason for ignoring the finding.", 27 | ) 28 | md5: str = Field( 29 | None, 30 | title="The MD5 sum of the file to ignore.", 31 | ) 32 | module: str = Field( 33 | "stacs.scan.scanner.rules", 34 | title="Which module to ignore findings from.", 35 | ) 36 | references: List[str] = Field( 37 | [], 38 | title=( 39 | "A list of references to ignore findings from, defaults to all if not set." 40 | ), 41 | ) 42 | offset: int = Field( 43 | None, 44 | title="The offset of the specific finding to ignore.", 45 | ) 46 | 47 | @validator("path", always=True) 48 | def exclusive_path_or_pattern(cls, value, values): 49 | """Ensure that either path or pattern is provided, not both.""" 50 | if values.get("pattern") and value: 51 | raise IgnoreListException( 52 | "Either path OR pattern must be specified, not both." 53 | ) 54 | 55 | if values.get("pattern") and not value and not values.get("md5"): 56 | raise IgnoreListException("One of pattern, path, or md5 must be set.") 57 | 58 | return value 59 | 60 | @validator("offset", always=True) 61 | def offset_and_refernces_both_set(cls, value, values): 62 | if value and len(values.get("references")) > 0: 63 | raise IgnoreListException( 64 | "An offset cannot be combined with a list of references." 65 | ) 66 | 67 | return value 68 | 69 | 70 | class Format(BaseModel, extra=Extra.forbid): 71 | """Defines the schema of the ignore list.""" 72 | 73 | include: List[str] = Field( 74 | [], 75 | title="Define a list of additional ignore lists to include.", 76 | ) 77 | ignore: List[Entry] = Field( 78 | [], 79 | title="Define a list of ignore list entries.", 80 | ) 81 | 82 | 83 | def from_file(filename: str) -> Format: 84 | """Load an ignore list from file, returning a rendered down and complete list.""" 85 | parent_file = os.path.abspath(os.path.expanduser(filename)) 86 | parent_path = os.path.dirname(parent_file) 87 | 88 | # Load the parent ignore list, and then recurse as needed to handle includes. 89 | try: 90 | with open(parent_file, "r") as fin: 91 | parent_list = Format(**json.load(fin)) 92 | 93 | # Roll over the include list and replace all entries with a fully qualified, 94 | # path, if not already set. 95 | for index, path in enumerate(parent_list.include): 96 | parent_list.include[index] = os.path.expanduser(path) 97 | if not path.startswith("/"): 98 | parent_list.include[index] = os.path.join(parent_path, path) 99 | except (OSError, json.JSONDecodeError) as err: 100 | raise STACSException(err) 101 | 102 | # Recursively load included ignore lists. 103 | for file in parent_list.include: 104 | child_pack = from_file(file) 105 | parent_list.ignore.extend(child_pack.ignore) 106 | 107 | # Finally strip the included ignore lists from the entry, as these have been 108 | # resolved, returning the loaded ignore lists to the caller. 109 | parent_list.include.clear() 110 | return parent_list 111 | -------------------------------------------------------------------------------- /stacs/scan/model/manifest.py: -------------------------------------------------------------------------------- 1 | """Defines types to assist with loading and processing of manifests. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from typing import List 7 | 8 | from pydantic import BaseModel, Extra, Field 9 | 10 | 11 | class Entry(BaseModel, extra=Extra.forbid): 12 | """Defines the schema of a file to process.""" 13 | 14 | path: str = Field( 15 | None, 16 | title="The path to the file on disk.", 17 | ) 18 | overlay: str = Field( 19 | None, 20 | title=( 21 | "The overlay path of a file. This is used to generate virtual paths which " 22 | "provider the path to files inside of archives." 23 | ), 24 | ) 25 | md5: str = Field( 26 | None, 27 | title="The MD5 sum of the file.", 28 | ) 29 | parent: str = Field( 30 | None, 31 | title="The MD5 sum of the file's parent.", 32 | ) 33 | mime: str = Field( 34 | None, 35 | title="The mimetype of the file.", 36 | ) 37 | size: int = Field( 38 | None, 39 | title="The size of the file.", 40 | ) 41 | 42 | 43 | class Format(BaseModel, extra=Extra.forbid): 44 | """Defines the schema of a manifest file.""" 45 | 46 | files: List[Entry] = Field( 47 | [], 48 | title="A list of files to scan.", 49 | ) 50 | -------------------------------------------------------------------------------- /stacs/scan/model/pack.py: -------------------------------------------------------------------------------- 1 | """Defines types to assist with loading and processing of rule packs. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import json 7 | import os 8 | from typing import List 9 | 10 | from pydantic import BaseModel, Extra, Field 11 | from stacs.scan.exceptions import STACSException 12 | 13 | 14 | class Entry(BaseModel, extra=Extra.forbid): 15 | """Defines the schema of an allow.""" 16 | 17 | module: str = Field( 18 | "rules", 19 | title="Which module the rules are for.", 20 | ) 21 | path: str = Field( 22 | None, 23 | title="The path of a the module's rule to load.", 24 | ) 25 | 26 | 27 | class Format(BaseModel, extra=Extra.forbid): 28 | """Defines the schema of the rule pack.""" 29 | 30 | include: List[str] = Field( 31 | [], 32 | title="Define a list of additional packs to include.", 33 | ) 34 | pack: List[Entry] = Field( 35 | [], 36 | title="A list of pack entries.", 37 | ) 38 | 39 | 40 | def from_file(filename: str) -> Format: 41 | """Load a pack from file, returning a rendered down and complete pack.""" 42 | parent_file = os.path.abspath(os.path.expanduser(filename)) 43 | parent_path = os.path.dirname(parent_file) 44 | 45 | # Load the parent pack, and then recurse as needed to handle includes. 46 | try: 47 | with open(parent_file, "r") as fin: 48 | parent_pack = Format(**json.load(fin)) 49 | 50 | # Roll over the pack and ensure any entries are fully qualified. 51 | for entry in parent_pack.pack: 52 | entry.path = os.path.expanduser(entry.path) 53 | if not entry.path.startswith("/"): 54 | # Resolve and update the path. 55 | entry.path = os.path.join(parent_path, entry.path) 56 | # Roll over the include list and replace all entries with a fully qualified 57 | # path, if not already set. 58 | for index, path in enumerate(parent_pack.include): 59 | if not path.startswith("/"): 60 | parent_pack.include[index] = os.path.join(parent_path, path) 61 | except (OSError, json.JSONDecodeError) as err: 62 | raise STACSException(err) 63 | 64 | # Recursively load included packs, adding results to the loaded pack. 65 | for file in parent_pack.include: 66 | child_pack = from_file(file) 67 | parent_pack.pack.extend(child_pack.pack) 68 | 69 | # Finally strip the included packs from the entry, as these have been resolved, 70 | # returning the loaded pack to the caller. 71 | parent_pack.include.clear() 72 | return parent_pack 73 | -------------------------------------------------------------------------------- /stacs/scan/output/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines outputs supported by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.output import markdown # noqa: F401 7 | from stacs.scan.output import pretty # noqa: F401 8 | from stacs.scan.output import sarif # noqa: F401 9 | -------------------------------------------------------------------------------- /stacs/scan/output/markdown.py: -------------------------------------------------------------------------------- 1 | """Defines a markdown output handler for STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from typing import List 7 | 8 | from stacs.scan import model 9 | from stacs.scan.exceptions import NotImplementedException 10 | 11 | 12 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str: 13 | raise NotImplementedException("Markdown output not yet implemented, sorry!") 14 | -------------------------------------------------------------------------------- /stacs/scan/output/pretty.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from typing import List 3 | 4 | from colorama import Fore, init 5 | from stacs.scan import helper, model 6 | from stacs.scan.__about__ import __version__ 7 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR 8 | from stacs.scan.model.finding import Sample 9 | 10 | 11 | def generate_file_tree(virtual_path: str) -> str: 12 | """Returns a tree layout to the virtual path.""" 13 | tree = str() 14 | parts = virtual_path.split(ARCHIVE_FILE_SEPARATOR) 15 | 16 | for index, part in enumerate(parts): 17 | # Add some style. Print a package / box before each archive, and a document 18 | # before the file. 19 | if (index + 1) == len(parts): 20 | emoji = "📄" 21 | else: 22 | emoji = "📦" 23 | 24 | tree += f"{' ' * (index * 4)}`-- {emoji} {part}\n" 25 | 26 | return tree.rstrip() 27 | 28 | 29 | def generate_sample(sample: Sample): 30 | """Return a plain-text and text formatted sample.""" 31 | # Ensure the sample is nicely base64 encoded if binary, rather than slapping three 32 | # already base64'd strings together. 33 | raw = bytearray() 34 | if sample.binary: 35 | raw.extend(bytearray(base64.b64decode(sample.before))) 36 | raw.extend(bytearray(base64.b64decode(sample.finding))) 37 | raw.extend(bytearray(base64.b64decode(sample.after))) 38 | 39 | return str(base64.b64encode(raw), "utf-8") 40 | 41 | return "".join([sample.before, sample.finding, sample.after]) 42 | 43 | 44 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str: 45 | """Render a 'pretty' output to the console for human consumption.""" 46 | init() 47 | 48 | # Find all unsuppressed findings, and track them separately. 49 | results = {} 50 | unsuppressed = 0 51 | 52 | for finding in findings: 53 | # Check for suppressions. 54 | if finding.ignore is not None and finding.ignore.ignored: 55 | continue 56 | 57 | # Track it. 58 | unsuppressed += 1 59 | 60 | if results.get(finding.path) is None: 61 | results[finding.path] = [] 62 | 63 | # Extract location appropriately. 64 | location = None 65 | if finding.location.line: 66 | location = f"line {finding.location.line}" 67 | else: 68 | location = f"{finding.location.offset}-bytes" 69 | 70 | # Generates all strings for presentation right away. 71 | results[finding.path].append( 72 | { 73 | "tree": generate_file_tree(finding.path), 74 | "path": finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1], 75 | "rule": finding.source.reference, 76 | "text": finding.source.description, 77 | "location": location, 78 | "sample": generate_sample(finding.sample), 79 | } 80 | ) 81 | 82 | # Provide a summary. 83 | print(helper.banner(version=__version__)) 84 | 85 | if findings == 0: 86 | print("✨ " + Fore.GREEN + "No unsuppressed findings! Great work! ✨\n") 87 | return 88 | 89 | # Render out the findings. 90 | print( 91 | f"{Fore.RED}🔥 There were {unsuppressed} unsuppressed findings in " 92 | f"{len(results)} files 🔥\n" 93 | ) 94 | 95 | for candidate in results: 96 | filepath = candidate.split(ARCHIVE_FILE_SEPARATOR)[0] 97 | count = len(results[candidate]) 98 | 99 | if ARCHIVE_FILE_SEPARATOR in candidate: 100 | print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath} (Nested)") 101 | else: 102 | print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath}") 103 | 104 | for finding in results[candidate]: 105 | print() 106 | helper.printi(f"{Fore.YELLOW}Reason : {finding['text']}") 107 | helper.printi(f"{Fore.YELLOW}Rule Id : {finding['rule']}") 108 | helper.printi(f"{Fore.YELLOW}Location : {finding['location']}\n\n") 109 | helper.printi(f"{Fore.YELLOW}Filetree:\n\n") 110 | helper.printi( 111 | finding["tree"], 112 | prefix=f" {Fore.RESET}|{Fore.BLUE}", 113 | ) 114 | print() 115 | helper.printi(f"{Fore.YELLOW}Sample:\n\n") 116 | helper.printi( 117 | f"... {finding['sample']} ...", 118 | prefix=f" {Fore.RESET}|{Fore.BLUE}", 119 | ) 120 | print() 121 | 122 | print(f"\n{Fore.RESET}{'-' * 78}\n") 123 | -------------------------------------------------------------------------------- /stacs/scan/output/sarif.py: -------------------------------------------------------------------------------- 1 | """Defines a SARIF output handler for STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import base64 7 | import json 8 | import re 9 | from typing import Any, Dict, List, Optional, Tuple 10 | 11 | from stacs.scan import __about__, model 12 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR 13 | 14 | # Only one SARIF version will be supported at a time. 15 | SARIF_VERSION = "2.1.0" 16 | SARIF_SCHEMA_URI = "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json" 17 | SARIF_URI_BASE_ID = "STACSROOT" 18 | 19 | 20 | def confidence_to_level(confidence: int) -> str: 21 | """Maps the confidence of a finding to a SARIF level.""" 22 | if confidence < 70: 23 | return "warning" 24 | else: 25 | return "error" 26 | 27 | 28 | def render_artifact(path: str, parent: Optional[int] = None) -> Dict[str, Any]: 29 | """Create a new artifact entry.""" 30 | artifact = { 31 | "location": { 32 | "uri": path, 33 | "uriBaseId": SARIF_URI_BASE_ID, 34 | }, 35 | } 36 | 37 | if parent is not None: 38 | artifact["parentIndex"] = parent 39 | 40 | return artifact 41 | 42 | 43 | def path_in_artifacts(path: str, artifacts: List[Dict[str, Any]], parent) -> int: 44 | """Checks if a path exists in the artifacts list.""" 45 | for index, artifact in enumerate(artifacts): 46 | if path == artifact["location"]["uri"]: 47 | # Short circuit if we both don't have a parent. 48 | if artifact.get("parentIndex", None) is None and parent is None: 49 | return index 50 | 51 | # Check common ancestry. 52 | try: 53 | their_parent = artifact.get("parentIndex", None) 54 | our_parent = parent 55 | 56 | while True: 57 | if our_parent == their_parent: 58 | their_parent = artifacts[their_parent]["parentIndex"] 59 | our_parent = artifacts[our_parent]["parentIndex"] 60 | else: 61 | break 62 | except KeyError: 63 | # We're good all the way back to the root. 64 | return index 65 | 66 | return None 67 | 68 | 69 | def add_artifact( 70 | root: str, 71 | finding: model.finding.Entry, 72 | artifacts: List[Dict[str, Any]], 73 | ) -> Tuple[int, List[Dict[str, Any]]]: 74 | """Generates SARIF artifact entires for findings (SARIF v2.1.0 Section 3.24).""" 75 | parent = None 76 | 77 | for real_path in finding.path.split(ARCHIVE_FILE_SEPARATOR): 78 | # Strip the scan directory root from the path for Base URIs to work properly. 79 | path = re.sub(rf"^{root}", "", real_path).lstrip("/") 80 | 81 | # Check if the path already exists. 82 | new_parent = path_in_artifacts(path, artifacts, parent) 83 | if new_parent is not None: 84 | parent = new_parent 85 | continue 86 | 87 | artifacts.append(render_artifact(path, parent)) 88 | parent = len(artifacts) - 1 89 | 90 | # Add metadata to this entry, if missing. 91 | artifacts[parent]["hashes"] = { 92 | "md5": finding.md5, 93 | } 94 | return (parent, artifacts) 95 | 96 | 97 | def render( 98 | root: str, findings: List[model.finding.Entry], pack: model.pack.Format 99 | ) -> str: 100 | """Renders down a SARIF document for STACS findings.""" 101 | rules = [] 102 | results = [] 103 | artifacts = [] 104 | 105 | # Generate a result (SARIF v2.1.0 Section 3.27) for each finding. 106 | for finding in findings: 107 | # Suppressions (SARIF v2.1.0 Section 3.27.23) are used to track findings where 108 | # there is an "ignore" set - via ignore list. 109 | suppressions = [] 110 | 111 | # Create an artifactContent (SARIF v2.1.0 Section 3.3) entry to track the sample 112 | # of the finding. 113 | context_content = {} 114 | artifact_content = {} 115 | 116 | if finding.sample.binary: 117 | artifact_content["binary"] = finding.sample.finding 118 | # Unencode and then re-encode the sample into a single B64 string to provide 119 | # context. 120 | context_content["binary"] = str( 121 | base64.b64encode( 122 | base64.b64decode(finding.sample.before) 123 | + base64.b64decode(finding.sample.finding) 124 | + base64.b64decode(finding.sample.after) 125 | ), 126 | "utf-8", 127 | ) 128 | else: 129 | artifact_content["text"] = finding.sample.finding 130 | context_content["text"] = ( 131 | finding.sample.before + finding.sample.finding + finding.sample.after 132 | ) 133 | 134 | # Create a new contextRegion (SARIF v2.1.0 Section 3.29.5) to provide contextual 135 | # information about the finding, but do not include the byte or line number 136 | # offset. 137 | context = {"snippet": context_content} 138 | 139 | # Create a new region (SARIF v2.1.0 Section 3.30) to track the location of the 140 | # finding and the sample. 141 | region = { 142 | "byteOffset": finding.location.offset, 143 | "snippet": artifact_content, 144 | } 145 | 146 | # Line numbers are optional, as the input file may be binary. 147 | if finding.location.line: 148 | region["startLine"] = finding.location.line 149 | 150 | # Add a new artifact for this finding, or retrieve the location of the existing. 151 | index, artifacts = add_artifact(root, finding, artifacts) 152 | 153 | # Strip the scan directory root from the path, as the we're using the reference 154 | # from originalUriBaseIds (SARIF v2.1.0 Section 3.14.14) to allow "portability". 155 | path = finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1] 156 | relative_path = re.sub(rf"^{root}", "", path).lstrip("/") 157 | 158 | # Pin the artifact location back to a physical location (SARIF v2.1.0 Section 159 | # 3.28.3). 160 | physical_location = { 161 | "physicalLocation": { 162 | "region": region, 163 | "contextRegion": context, 164 | "artifactLocation": { 165 | "uri": relative_path, 166 | "index": index, 167 | "uriBaseId": SARIF_URI_BASE_ID, 168 | }, 169 | }, 170 | } 171 | 172 | # Generate a new Rule entry, if required (SARIF v2.1.0 Section 3.49). 173 | rule = None 174 | 175 | for candidate in rules: 176 | if finding.source.reference == candidate.get("id"): 177 | rule = candidate 178 | break 179 | 180 | if not rule: 181 | # Add the description from the original rule pack entry into the Rule for 182 | # easy tracking. 183 | rule = { 184 | "id": finding.source.reference, 185 | "shortDescription": { 186 | "text": finding.source.description, 187 | }, 188 | } 189 | rules.append(rule) 190 | 191 | # Add a Suppression entry if this finding was marked as "Ignored", along with 192 | # the reason (justification) from the original ignore list. 193 | if finding.ignore is not None and finding.ignore.ignored: 194 | suppressions.append( 195 | { 196 | "kind": "external", 197 | "status": "accepted", 198 | "justification": finding.ignore.reason, 199 | } 200 | ) 201 | 202 | # Track the finding (Result). 203 | results.append( 204 | { 205 | "message": rule.get("shortDescription"), 206 | "level": confidence_to_level(finding.confidence), 207 | "ruleId": finding.source.reference, 208 | "locations": [ 209 | physical_location, 210 | ], 211 | "suppressions": suppressions, 212 | } 213 | ) 214 | 215 | # Add a toolComponent (SARIF v2.1.0 Section 3.19), and bolt it all together. 216 | tool = { 217 | "driver": { 218 | "name": __about__.__title__.upper(), 219 | "rules": rules, 220 | "version": __about__.__version__, 221 | "downloadUri": __about__.__uri__, 222 | "informationUri": __about__.__uri__, 223 | }, 224 | } 225 | run = { 226 | "tool": tool, 227 | "results": results, 228 | "artifacts": artifacts, 229 | "originalUriBaseIds": { 230 | SARIF_URI_BASE_ID: { 231 | "uri": f"file://{root.rstrip('/')}/", 232 | }, 233 | }, 234 | } 235 | sarif = { 236 | "version": SARIF_VERSION, 237 | "$schema": SARIF_SCHEMA_URI, 238 | "runs": [ 239 | run, 240 | ], 241 | } 242 | 243 | # Return a stringified JSON representation of the SARIF document. 244 | return json.dumps(sarif) 245 | -------------------------------------------------------------------------------- /stacs/scan/scanner/__init__.py: -------------------------------------------------------------------------------- 1 | """Defines scanners used by STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from stacs.scan.scanner import rules 7 | 8 | # Export all enabled scanners. 9 | __all__ = [ 10 | "rules", 11 | ] 12 | -------------------------------------------------------------------------------- /stacs/scan/scanner/rules.py: -------------------------------------------------------------------------------- 1 | """Implements a rules based scanner for STACS. 2 | 3 | SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import base64 7 | import hashlib 8 | import os 9 | from concurrent.futures import ThreadPoolExecutor, as_completed 10 | from typing import List 11 | 12 | import yara 13 | 14 | from stacs.scan.constants import CHUNK_SIZE, WINDOW_SIZE 15 | from stacs.scan.exceptions import FileAccessException, InvalidFormatException 16 | from stacs.scan.loader import archive 17 | from stacs.scan.model import finding, manifest, pack 18 | 19 | 20 | def is_binary(target: manifest.Entry) -> bool: 21 | """Attempts to determine if a target file is binary.""" 22 | # This is a bit false positive prone, as some "application" mime types are text. 23 | # However, as we only support a finite number of formats, we should be safe to do 24 | # this (for now!) 25 | binary_types = ( 26 | "application", 27 | "image", 28 | "audio", 29 | "video", 30 | ) 31 | if target.mime and target.mime.startswith(binary_types): 32 | return True 33 | 34 | # Otherwise, we'll try and read some data as text and see. This could fail if a 35 | # binary contained readable text for 10 * CHUNK_SIZE. 36 | try: 37 | with open(target.path, "r") as fin: 38 | for _ in range(0, 10): 39 | fin.read(CHUNK_SIZE) 40 | except UnicodeDecodeError: 41 | return True 42 | 43 | # Define to text. 44 | return False 45 | 46 | 47 | def generate_sample(target: manifest.Entry, offset: int, size: int) -> finding.Sample: 48 | """Generates a sample for a finding.""" 49 | binary = is_binary(target) 50 | 51 | before = bytes() 52 | after = bytes() 53 | entry = bytes() 54 | 55 | try: 56 | # Make sure we don't try and read past the beginning and end of the file. 57 | target_sz = os.stat(target.path).st_size 58 | 59 | if offset - WINDOW_SIZE < 0: 60 | before_sz = offset 61 | before_offset = 0 62 | else: 63 | before_sz = WINDOW_SIZE 64 | before_offset = offset - before_sz 65 | 66 | # Ensure we read N bytes AFTER the entire match, not after the first byte of the 67 | # match. 68 | if offset + size + WINDOW_SIZE > target_sz: 69 | after_sz = target_sz - (offset + size) 70 | after_offset = target_sz - after_sz 71 | else: 72 | after_sz = WINDOW_SIZE 73 | after_offset = offset + size 74 | 75 | with open(target.path, "rb") as fin: 76 | # Seek to and read in the context before. 77 | fin.seek(before_offset) 78 | before = fin.read(before_sz) 79 | 80 | # Read the finding match itself. We have this already from yara, but we're 81 | # already here so we may as well. 82 | fin.seek(offset) 83 | entry = fin.read(size) 84 | 85 | # Seek to and read in the context after the finding. 86 | fin.seek(after_offset) 87 | after = fin.read(after_sz) 88 | except OSError as err: 89 | raise FileAccessException(err) 90 | 91 | if not binary: 92 | try: 93 | return finding.Sample( 94 | window=WINDOW_SIZE, 95 | before=str(before, "utf-8"), 96 | after=str(after, "utf-8"), 97 | finding=str(entry, "utf-8"), 98 | binary=binary, 99 | ) 100 | except UnicodeDecodeError: 101 | # Fall through and return a base64 encoded sample. 102 | pass 103 | 104 | return finding.Sample( 105 | window=WINDOW_SIZE, 106 | before=base64.b64encode(before), 107 | after=base64.b64encode(after), 108 | finding=base64.b64encode(entry), 109 | binary=binary, 110 | ) 111 | 112 | 113 | def generate_location(target: manifest.Entry, offset: int) -> finding.Location: 114 | """Generates a location for a finding.""" 115 | # If the file is binary, we can't generate a line number so we already have the data 116 | # we need. 117 | if is_binary(target): 118 | return finding.Location(offset=offset) 119 | 120 | # Attempt to generate a line number for the finding. 121 | bytes_read = 0 122 | line_number = 1 123 | try: 124 | with open(target.path, "r") as fin: 125 | # Read in chunks, counting the number of newline characters up to the chunk 126 | # which includes the finding. 127 | while bytes_read < offset: 128 | bytes_read += CHUNK_SIZE 129 | 130 | if bytes_read > offset: 131 | line_number += fin.read(offset).count("\n") 132 | else: 133 | line_number += fin.read(CHUNK_SIZE).count("\n") 134 | except UnicodeDecodeError: 135 | # It's possible to get into a state where the detected mime-type of a file is 136 | # incorrect, resulting in unprocessable binary data making it here. In these 137 | # cases we'll just bail early and report the number of bytes into the file of 138 | # the finding. Exactly as we do for known binary files. 139 | return finding.Location(offset=offset) 140 | except OSError as err: 141 | raise FileAccessException(err) 142 | 143 | return finding.Location(offset=offset, line=line_number) 144 | 145 | 146 | def generate_findings(target: manifest.Entry, match: yara.Match) -> List[finding.Entry]: 147 | """Attempts to create findings based on matches inside of the target file.""" 148 | findings = [] 149 | 150 | # Generate a new finding entry for each matched string. This is in order to ensure 151 | # that multiple findings in the same file are listed separately - as they may be 152 | # different credentials. 153 | for offset, _, entry in match.strings: 154 | location = generate_location(target, offset) 155 | sample = generate_sample(target, offset, len(entry)) 156 | 157 | # Add on information about the origin of the finding (that's us!) 158 | source = finding.Source( 159 | module=__name__, 160 | reference=match.rule, 161 | tags=match.tags, 162 | version=match.meta.get("version", "UNKNOWN"), 163 | description=match.meta.get("description"), 164 | ) 165 | findings.append( 166 | finding.Entry( 167 | md5=target.md5, 168 | path=target.overlay if target.overlay else target.path, 169 | confidence=match.meta.get("accuracy", 50), 170 | source=source, 171 | sample=sample, 172 | location=location, 173 | ) 174 | ) 175 | 176 | return findings 177 | 178 | 179 | def matcher(target: manifest.Entry, ruleset: yara.Rules) -> List[finding.Entry]: 180 | findings = [] 181 | 182 | for match in ruleset.match(target.path): 183 | findings.extend(generate_findings(target, match)) 184 | 185 | return findings 186 | 187 | 188 | def run( 189 | targets: List[manifest.Entry], 190 | pack: pack.Format, 191 | workers: int = 10, 192 | skip_on_eacces: bool = True, 193 | ) -> List[finding.Entry]: 194 | """ 195 | Executes the rules based matcher on all input files, returning a list of finding 196 | Entry objects. 197 | """ 198 | findings = [] 199 | 200 | # Load and compile all YARA rules up front. 201 | namespaces = dict() 202 | 203 | for rule in pack.pack: 204 | namespace = hashlib.md5(bytes(rule.path, "utf-8")).hexdigest() 205 | namespaces[namespace] = rule.path 206 | 207 | try: 208 | ruleset = yara.compile(filepaths=namespaces) 209 | except yara.Error as err: 210 | raise InvalidFormatException(err) 211 | 212 | # Run the matcher in a thread pool as we're likely to be I/O bound. 213 | with ThreadPoolExecutor(max_workers=workers) as pool: 214 | futures = [] 215 | 216 | # Reject any input files which are supported archives. This is as we should have 217 | # unpacked versions of these to process, which allows matching the specific file 218 | # with a finding, rather than a finding on an archive. 219 | # 220 | # NOTE: Credentials stuffed into metadata of supported archive formats which 221 | # support archive metadata (such as Zip's "Extra") will not be found. 222 | # 223 | for target in targets: 224 | if target.mime not in archive.MIME_TYPE_HANDLERS: 225 | futures.append(pool.submit(matcher, target, ruleset)) 226 | 227 | for future in as_completed(futures): 228 | try: 229 | findings.extend(future.result()) 230 | except FileAccessException: 231 | if not skip_on_eacces: 232 | raise 233 | 234 | return findings 235 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/fixtures/.gitignore -------------------------------------------------------------------------------- /tests/fixtures/findings/001.txt: -------------------------------------------------------------------------------- 1 | Credential is at the end of file, with less than the WINDOW_SIZE available for a sample 2 | this should cause the after window to be reduced to only match the bytes remaining after 3 | the finding. 4 | 5 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk 6 | -------------------------------------------------------------------------------- /tests/fixtures/findings/002.txt: -------------------------------------------------------------------------------- 1 | X 2 | 3 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk 4 | 5 | There is plenty of data after the finding, but the sample before the finding should not 6 | try and read past the start of the file. 7 | -------------------------------------------------------------------------------- /tests/fixtures/findings/003.txt: -------------------------------------------------------------------------------- 1 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk -------------------------------------------------------------------------------- /tests/fixtures/findings/004.txt: -------------------------------------------------------------------------------- 1 | There is both plenty of data before and after the finding, so samples should operate 2 | properly and capture WINDOW_SIZE before and after. 3 | 4 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk 5 | 6 | Unlike the previous finding fixture (003) where the file ONLY contains the finding with 7 | no additional data. 8 | -------------------------------------------------------------------------------- /tests/fixtures/ignore_list/001-simple.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignore": [ 3 | { 4 | "pattern": "src/crypto/rsa\\.c", 5 | "reason": "PEM format RSA header and trailer constants due to parser." 6 | } 7 | ] 8 | } -------------------------------------------------------------------------------- /tests/fixtures/ignore_list/002-framework.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "ignore": [ 4 | { 5 | "pattern": ".*/tests/.*", 6 | "reason": "Test fixtures contain example credentials." 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /tests/fixtures/ignore_list/002-project.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "002-framework.valid.json", 4 | "002-system.valid.json" 5 | ], 6 | "ignore": [] 7 | } -------------------------------------------------------------------------------- /tests/fixtures/ignore_list/002-system.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "ignore": [ 4 | { 5 | "pattern": ".*/libexample\\.so$", 6 | "reason": "libexample contains false positives due to a reason." 7 | }, 8 | { 9 | "pattern": ".*/libssl.*?\\.so$", 10 | "reason": "Ignore all hash rules inside version X.Y.Z of libssl", 11 | "references": [ 12 | "CredentialHashMD5", 13 | "CredentialHashSHA1", 14 | "CredentialHashSHA256", 15 | "CredentialHashSHA512" 16 | ] 17 | }, 18 | { 19 | "md5": "e95348ed81f439d0a73a18835bd78eec", 20 | "reason": "Ignore all hash rules inside version X.Y.Z of example file", 21 | "references": [ 22 | "CredentialHashMD5", 23 | "CredentialHashSHA1", 24 | "CredentialHashSHA256", 25 | "CredentialHashSHA512" 26 | ] 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /tests/fixtures/pack/001-simple.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "pack": [ 4 | { 5 | "module": "rules", 6 | "path": "all.yar" 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /tests/fixtures/pack/002-cloud.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "pack": [ 4 | { 5 | "module": "rules", 6 | "path": "credential/cloud/aws/access_key.yar" 7 | }, 8 | { 9 | "module": "rules", 10 | "path": "credential/cloud/gcp/service_account.yar" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/fixtures/pack/002-parent.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "002-cloud.valid.json", 4 | "002-pki.valid.json" 5 | ], 6 | "pack": [] 7 | } 8 | -------------------------------------------------------------------------------- /tests/fixtures/pack/002-pki-dsa.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "pack": [ 4 | { 5 | "module": "rules", 6 | "path": "credential/pki/dsa/der.yar" 7 | }, 8 | { 9 | "module": "rules", 10 | "path": "credential/pki/dsa/pem.yar" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /tests/fixtures/pack/002-pki-rsa.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [], 3 | "pack": [ 4 | { 5 | "module": "rules", 6 | "path": "credential/pki/rsa/der.yar" 7 | }, 8 | { 9 | "module": "rules", 10 | "path": "credential/pki/rsa/pem.yar" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /tests/fixtures/pack/002-pki.valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": [ 3 | "002-pki-rsa.valid.json", 4 | "002-pki-dsa.valid.json" 5 | ], 6 | "pack": [] 7 | } -------------------------------------------------------------------------------- /tests/test_filter_ignore_list.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS ignore list filter.""" 2 | 3 | import os 4 | import unittest 5 | 6 | import stacs.scan 7 | 8 | 9 | class STACSFilterIgnoreListTestCase(unittest.TestCase): 10 | """Tests the STACS ignore list filter.""" 11 | 12 | def setUp(self): 13 | """Ensure the application is setup for testing.""" 14 | self.fixtures_path = os.path.join( 15 | os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/" 16 | ) 17 | 18 | def tearDown(self): 19 | """Ensure everything is torn down between tests.""" 20 | pass 21 | 22 | def test_by_path(self): 23 | """Validate whether path filters are working.""" 24 | # Use the same fixture for all branches. 25 | finding = stacs.scan.model.finding.Entry( 26 | path="/a/a", 27 | md5="fa19207ef28b6a97828e3a22b11290e9", 28 | location=stacs.scan.model.finding.Location( 29 | offset=300, 30 | ), 31 | source=stacs.scan.model.finding.Source( 32 | module="stacs.scan.scanner.rules", 33 | reference="SomeRule", 34 | ), 35 | ) 36 | 37 | # Define ignores which should correctly be ignored. 38 | hits = [ 39 | # Path matches, no other constraint. 40 | stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test"), 41 | # Path matches, reference matches. 42 | stacs.scan.model.ignore_list.Entry( 43 | path="/a/a", reason="Test", references=["SomeRule", "OtherRule"] 44 | ), 45 | # Path matches, offset matches. 46 | stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test", offset=300), 47 | ] 48 | 49 | # Path differs. 50 | miss = stacs.scan.model.ignore_list.Entry(path="/a/b", reason="Test") 51 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False) 52 | 53 | # Path matches, reference differs. 54 | miss = stacs.scan.model.ignore_list.Entry( 55 | path="/a/a", reason="Test", references=["OtherRule"] 56 | ) 57 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False) 58 | 59 | # Path matches, offset differs. 60 | miss = stacs.scan.model.ignore_list.Entry( 61 | path="/a/a", reason="Test", offset=1234 62 | ) 63 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False) 64 | 65 | # Ensure all hit entries are matches. 66 | for hit in hits: 67 | self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, hit), True) 68 | 69 | def test_by_pattern(self): 70 | """Validate whether pattern filters are working.""" 71 | # Use the same fixture for all branches. 72 | finding = stacs.scan.model.finding.Entry( 73 | path="/a/tests/a", 74 | md5="fa19207ef28b6a97828e3a22b11290e9", 75 | location=stacs.scan.model.finding.Location( 76 | offset=300, 77 | ), 78 | source=stacs.scan.model.finding.Source( 79 | module="stacs.scan.scanner.rules", 80 | reference="SomeRule", 81 | ), 82 | ) 83 | 84 | # Pattern matches, no other constraint. 85 | hit = stacs.scan.model.ignore_list.Entry(pattern=".*/tests/.*", reason="Test") 86 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True) 87 | 88 | # Pattern matches, reference matches. 89 | hit = stacs.scan.model.ignore_list.Entry( 90 | pattern=".*/tests/.*", 91 | reason="Test", 92 | references=["SomeRule", "OtherRule"], 93 | ) 94 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True) 95 | 96 | # Pattern matches, offset matches. 97 | hit = stacs.scan.model.ignore_list.Entry( 98 | pattern=".*/tests/.*", reason="Test", offset=300 99 | ) 100 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True) 101 | 102 | # Pattern differs. 103 | miss = stacs.scan.model.ignore_list.Entry(pattern=r"\.shasums$", reason="Test") 104 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False) 105 | 106 | # Pattern matches, reference differs. 107 | miss = stacs.scan.model.ignore_list.Entry( 108 | pattern=".*/tests/.*", reason="Test", references=["OtherRule"] 109 | ) 110 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False) 111 | 112 | # Pattern matches, offset differs. 113 | miss = stacs.scan.model.ignore_list.Entry( 114 | pattern=".*/tests/.*", reason="Test", offset=1234 115 | ) 116 | self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False) 117 | 118 | def test_by_hash(self): 119 | """Validate whether hash filters are working.""" 120 | # Use the same fixture for all branches. 121 | finding = stacs.scan.model.finding.Entry( 122 | path="/a/tests/a", 123 | md5="fa19207ef28b6a97828e3a22b11290e9", 124 | location=stacs.scan.model.finding.Location( 125 | offset=300, 126 | ), 127 | source=stacs.scan.model.finding.Source( 128 | module="stacs.scan.scanner.rules", 129 | reference="SomeRule", 130 | ), 131 | ) 132 | 133 | # Hash matches, no other constraint. 134 | hit = stacs.scan.model.ignore_list.Entry( 135 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test" 136 | ) 137 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True) 138 | 139 | # Hash matches, reference matches. 140 | hit = stacs.scan.model.ignore_list.Entry( 141 | md5="fa19207ef28b6a97828e3a22b11290e9", 142 | reason="Test", 143 | references=["SomeRule", "OtherRule"], 144 | ) 145 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True) 146 | 147 | # Hash matches, offset matches. 148 | hit = stacs.scan.model.ignore_list.Entry( 149 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=300 150 | ) 151 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True) 152 | 153 | # Hash differs. 154 | miss = stacs.scan.model.ignore_list.Entry( 155 | md5="cf42e6f36da80658591489975bbd845b", reason="Test" 156 | ) 157 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False) 158 | 159 | # Hash matches, reference differs. 160 | miss = stacs.scan.model.ignore_list.Entry( 161 | md5="fa19207ef28b6a97828e3a22b11290e9", 162 | reason="Test", 163 | references=["OtherRule"], 164 | ) 165 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False) 166 | 167 | # Hash matches, offset differs. 168 | miss = stacs.scan.model.ignore_list.Entry( 169 | md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=1234 170 | ) 171 | self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False) 172 | -------------------------------------------------------------------------------- /tests/test_loader_filepath.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS filepath loader.""" 2 | 3 | import os 4 | import unittest 5 | 6 | 7 | class STACSLoaderFilepathTestCase(unittest.TestCase): 8 | """Tests the STACS filepath loader.""" 9 | 10 | def setUp(self): 11 | """Ensure the application is setup for testing.""" 12 | self.fixtures_path = os.path.join( 13 | os.path.dirname(os.path.abspath(__file__)), "fixtures/" 14 | ) 15 | 16 | def tearDown(self): 17 | """Ensure everything is torn down between tests.""" 18 | pass 19 | -------------------------------------------------------------------------------- /tests/test_model_ignore_list.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS allow list model and validator.""" 2 | 3 | import json 4 | import os 5 | import unittest 6 | 7 | import stacs.scan 8 | 9 | 10 | class STACSModelAllowListTestCase(unittest.TestCase): 11 | """Tests the STACS allow list model and validator.""" 12 | 13 | def setUp(self): 14 | """Ensure the application is setup for testing.""" 15 | self.fixtures_path = os.path.join( 16 | os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/" 17 | ) 18 | 19 | def tearDown(self): 20 | """Ensure everything is torn down between tests.""" 21 | pass 22 | 23 | def test_simple(self): 24 | """Ensure that simple allow lists can be loaded.""" 25 | with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f: 26 | stacs.scan.model.ignore_list.Format(**json.load(f)) 27 | 28 | def test_hierarchical_loading(self): 29 | """Ensure that hierarchical allow lists can be loaded.""" 30 | with open(os.path.join(self.fixtures_path, "002-project.valid.json"), "r") as f: 31 | stacs.scan.model.ignore_list.Format(**json.load(f)) 32 | -------------------------------------------------------------------------------- /tests/test_model_pack.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS pack model and validator.""" 2 | 3 | import json 4 | import os 5 | import unittest 6 | 7 | import stacs.scan 8 | 9 | 10 | class STACSModelPackTestCase(unittest.TestCase): 11 | """Tests the STACS pack model and validator.""" 12 | 13 | def setUp(self): 14 | """Ensure the application is setup for testing.""" 15 | self.fixtures_path = os.path.join( 16 | os.path.dirname(os.path.abspath(__file__)), "fixtures/pack/" 17 | ) 18 | 19 | def tearDown(self): 20 | """Ensure everything is torn down between tests.""" 21 | pass 22 | 23 | def test_simple_pack(self): 24 | """Ensure that simple packs can be loaded.""" 25 | with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f: 26 | stacs.scan.model.pack.Format(**json.load(f)) 27 | -------------------------------------------------------------------------------- /tests/test_output_sarif.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS SARIF output module.""" 2 | 3 | import unittest 4 | 5 | import stacs.scan 6 | 7 | 8 | class STACSOutputSARIFTestCase(unittest.TestCase): 9 | """Tests the STACS SARIF output module.""" 10 | 11 | def setUp(self): 12 | """Ensure the application is setup for testing.""" 13 | pass 14 | 15 | def tearDown(self): 16 | """Ensure everything is torn down between tests.""" 17 | pass 18 | 19 | def test_add_artifact(self): 20 | """Ensure that artifact entries are deduplicated by their full path.""" 21 | findings = [ 22 | stacs.scan.model.finding.Entry( 23 | path="/tmp/rootfs/etc/passwd", 24 | md5="b39bfc0e26a30024c76e4dcb8a1eae87", 25 | ), 26 | stacs.scan.model.finding.Entry( 27 | path="/tmp/rootfs/etc/passwd", 28 | md5="b39bfc0e26a30024c76e4dcb8a1eae87", 29 | ), 30 | stacs.scan.model.finding.Entry( 31 | path="/tmp/rootfs/a.tar.gz!a.tar!cred", 32 | md5="bf072e9119077b4e76437a93986787ef", 33 | ), 34 | stacs.scan.model.finding.Entry( 35 | path="/tmp/rootfs/a.tar.gz!a.tar!b_cred", 36 | md5="30cf3d7d133b08543cb6c8933c29dfd7", 37 | ), 38 | stacs.scan.model.finding.Entry( 39 | path="/tmp/rootfs/b.tar.gz!b_cred", 40 | md5="57b8d745384127342f95660d97e1c9c2", 41 | ), 42 | stacs.scan.model.finding.Entry( 43 | path="/tmp/rootfs/b.tar.gz!a.tar!cred", 44 | md5="787c9a8e2148e711f6e9f44696cf341f", 45 | ), 46 | stacs.scan.model.finding.Entry( 47 | path="/tmp/rootfs/a.tar.gz!a.tar!b.tar.gz!b.tar!pass", 48 | md5="d2a33790e5bf28b33cdbf61722a06989", 49 | ), 50 | ] 51 | 52 | # Ensure we get the expected number of artifacts in the artifacts list. 53 | artifacts = [] 54 | for finding in findings: 55 | _, artifacts = stacs.scan.output.sarif.add_artifact( 56 | "/tmp/rootfs/", finding, artifacts 57 | ) 58 | 59 | # Ensure findings are unfurled into the expected number of unique artifacts. 60 | self.assertEqual(len(artifacts), 12) 61 | -------------------------------------------------------------------------------- /tests/test_scanner_rule.py: -------------------------------------------------------------------------------- 1 | """Tests the STACS Scanner Rule module.""" 2 | 3 | import os 4 | import unittest 5 | 6 | import stacs.scan 7 | 8 | 9 | class STACSScannerRuleTestCase(unittest.TestCase): 10 | """Tests the STACS Scanner Rule module.""" 11 | 12 | def setUp(self): 13 | """Ensure the application is setup for testing.""" 14 | self.fixtures_path = os.path.join( 15 | os.path.dirname(os.path.abspath(__file__)), "fixtures/findings/" 16 | ) 17 | 18 | def tearDown(self): 19 | """Ensure everything is torn down between tests.""" 20 | pass 21 | 22 | def test_generate_sample(self): 23 | """Ensures that samples are correctly generated.""" 24 | reduced_after_finding = stacs.scan.model.manifest.Entry( 25 | path=os.path.join(self.fixtures_path, "001.txt") 26 | ) 27 | reduced_before_finding = stacs.scan.model.manifest.Entry( 28 | path=os.path.join(self.fixtures_path, "002.txt") 29 | ) 30 | only_finding = stacs.scan.model.manifest.Entry( 31 | path=os.path.join(self.fixtures_path, "003.txt") 32 | ) 33 | sufficent_before_after_finding = stacs.scan.model.manifest.Entry( 34 | path=os.path.join(self.fixtures_path, "004.txt") 35 | ) 36 | 37 | # Check that the correct number of bytes were extracted before and after the 38 | # respective findings. 39 | context = stacs.scan.scanner.rules.generate_sample( 40 | reduced_after_finding, 41 | 191, # Offset. 42 | 40, # Size. 43 | ) 44 | self.assertEqual(len(context.before), 20) 45 | self.assertEqual(len(context.finding), 40) 46 | self.assertEqual(len(context.after), 1) 47 | 48 | context = stacs.scan.scanner.rules.generate_sample( 49 | reduced_before_finding, 50 | 3, # Offset. 51 | 40, # Size. 52 | ) 53 | self.assertEqual(len(context.before), 3) 54 | self.assertEqual(len(context.finding), 40) 55 | self.assertEqual(len(context.after), 20) 56 | 57 | context = stacs.scan.scanner.rules.generate_sample( 58 | only_finding, 59 | 0, # Offset. 60 | 40, # Size. 61 | ) 62 | self.assertEqual(len(context.before), 0) 63 | self.assertEqual(len(context.finding), 40) 64 | self.assertEqual(len(context.after), 0) 65 | 66 | context = stacs.scan.scanner.rules.generate_sample( 67 | sufficent_before_after_finding, 68 | 137, # Offset. 69 | 40, # Size. 70 | ) 71 | self.assertEqual(len(context.before), 20) 72 | self.assertEqual(len(context.finding), 40) 73 | self.assertEqual(len(context.after), 20) 74 | -------------------------------------------------------------------------------- /wrapper/stacs-scan: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # This wrapper is used to determine whether a stacs ignore is present in the scan 4 | # directory. 5 | # 6 | 7 | SCAN_DIR="/mnt/stacs/input" 8 | 9 | # Define additional flags to pass. 10 | STACS_FLAGS="" 11 | 12 | if [ ${STACS_SKIP_UNPROCESSABLE:-0} -ne 0 ]; then 13 | STACS_FLAGS="${STACS_FLAGS} --skip-unprocessable" 14 | fi 15 | 16 | if [ ${STACS_THREADS:-10} -ne 10 ]; then 17 | STACS_FLAGS="${STACS_FLAGS} --threads ${STACS_THREADS}" 18 | fi 19 | 20 | if [ ${STACS_DEBUG:-0} -ne 0 ]; then 21 | STACS_FLAGS="${STACS_FLAGS} --debug" 22 | fi 23 | 24 | if [ ${STACS_OUTPUT_PRETTY:-0} -ne 0 ]; then 25 | STACS_FLAGS="${STACS_FLAGS} --pretty" 26 | fi 27 | 28 | # If additional arguments are provided, use them instead of defaults. 29 | if [ "$#" -gt 0 ]; then 30 | stacs "$@" 31 | else 32 | # Use an ignore list, if present. 33 | if [ -e "${SCAN_DIR}/stacs.ignore.json" ]; then 34 | stacs \ 35 | --rule-pack /mnt/stacs/rules/credential.json \ 36 | --cache-directory /mnt/stacs/cache \ 37 | --ignore-list "${SCAN_DIR}/stacs.ignore.json" \ 38 | ${STACS_FLAGS} \ 39 | "${SCAN_DIR}/" 40 | else 41 | stacs \ 42 | --rule-pack /mnt/stacs/rules/credential.json \ 43 | --cache-directory /mnt/stacs/cache \ 44 | ${STACS_FLAGS} \ 45 | "${SCAN_DIR}/" 46 | fi 47 | fi 48 | --------------------------------------------------------------------------------