├── .github
    ├── CODEOWNERS
    └── workflows
    │   ├── check.yml
    │   ├── publish.yml
    │   ├── publish_to_testing.yml
    │   └── update.yml
├── .gitignore
├── .vscode
    ├── c_cpp_properties.json
    └── settings.json
├── Dockerfile
├── LICENSE
├── README.md
├── docs
    └── images
    │   ├── Human-Output-Example.png
    │   ├── SARIF-Viewer-Example.png
    │   ├── STACS-Logo-RGB.png
    │   └── STACS-Logo-RGB.small.png
├── pyproject.toml
├── setup.py
├── stacs
    ├── __init__.py
    ├── native
    │   └── archive
    │   │   └── src
    │   │       ├── archive.cpp
    │   │       ├── archiveentry.cpp
    │   │       ├── archiveentry.hpp
    │   │       ├── archivereader.cpp
    │   │       └── archivereader.hpp
    └── scan
    │   ├── __about__.py
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── entrypoint
    │       ├── __init__.py
    │       └── cli.py
    │   ├── exceptions.py
    │   ├── filter
    │       ├── __init__.py
    │       └── ignore_list.py
    │   ├── helper.py
    │   ├── loader
    │       ├── __init__.py
    │       ├── archive.py
    │       ├── filepath.py
    │       ├── format
    │       │   ├── __init__.py
    │       │   ├── dmg.py
    │       │   └── xar.py
    │       └── manifest.py
    │   ├── model
    │       ├── __init__.py
    │       ├── finding.py
    │       ├── ignore_list.py
    │       ├── manifest.py
    │       └── pack.py
    │   ├── output
    │       ├── __init__.py
    │       ├── markdown.py
    │       ├── pretty.py
    │       └── sarif.py
    │   └── scanner
    │       ├── __init__.py
    │       └── rules.py
├── tests
    ├── __init__.py
    ├── fixtures
    │   ├── .gitignore
    │   ├── findings
    │   │   ├── 001.txt
    │   │   ├── 002.txt
    │   │   ├── 003.txt
    │   │   └── 004.txt
    │   ├── ignore_list
    │   │   ├── 001-simple.valid.json
    │   │   ├── 002-framework.valid.json
    │   │   ├── 002-project.valid.json
    │   │   └── 002-system.valid.json
    │   └── pack
    │   │   ├── 001-simple.valid.json
    │   │   ├── 002-cloud.valid.json
    │   │   ├── 002-parent.valid.json
    │   │   ├── 002-pki-dsa.valid.json
    │   │   ├── 002-pki-rsa.valid.json
    │   │   └── 002-pki.valid.json
    ├── test_filter_ignore_list.py
    ├── test_loader_filepath.py
    ├── test_model_ignore_list.py
    ├── test_model_pack.py
    ├── test_output_sarif.py
    └── test_scanner_rule.py
└── wrapper
    └── stacs-scan


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Own everything by default. This can be changed later and as needed.
2 | *   @darkarnium
3 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: Check
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   check:
10 |     strategy:
11 |       matrix:
12 |         python: ['3.9', '3.10', '3.11']
13 | 
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v2
17 | 
18 |       - name: Install Dependencies
19 |         run: |
20 |           sudo apt update
21 |           sudo apt install -y libarchive13 libarchive-dev
22 | 
23 |       - name: Configure Python (${{ matrix.python }})
24 |         uses: actions/setup-python@v2
25 |         with:
26 |           python-version: ${{ matrix.python }}
27 | 
28 |       - name: Install Tox
29 |         run: |
30 |           python -m pip install --upgrade pip wheel setuptools
31 |           pip install tox
32 | 
33 |       - name: Run Linters (${{ matrix.python }})
34 |         run: |
35 |           tox -e linters
36 | 
37 |       - name: Run Tests (${{ matrix.python }})
38 |         run: |
39 |           tox -e py3
40 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish
  2 | 
  3 | on:
  4 |   release:
  5 |     types: [published]
  6 | 
  7 | jobs:
  8 |   build_wheels:
  9 |     name: Build wheels on ${{ matrix.os }}
 10 |     runs-on: ${{ matrix.os }}
 11 |     strategy:
 12 |       matrix:
 13 |         os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12]
 14 | 
 15 |     steps:
 16 |       - uses: actions/checkout@v2
 17 |         with:
 18 |           ref: ${{ github.event.inputs.release }}
 19 | 
 20 |       - name: Configure Python
 21 |         uses: actions/setup-python@v3
 22 | 
 23 |       # TODO: This may result in macOS compiling against a newer version of libarchive
 24 |       # than Linux.
 25 |       - name: Install dependencies (macOS)
 26 |         if: startsWith(matrix.os, 'macos-')
 27 |         run: brew install libarchive
 28 | 
 29 |       - name: Install cibuildwheel
 30 |         run: python -m pip install cibuildwheel==2.12.3
 31 | 
 32 |       - name: Build wheels (macOS)
 33 |         run: python -m cibuildwheel --output-dir wheelhouse
 34 |         if: startsWith(matrix.os, 'macos-')
 35 | 
 36 |       - name: Build wheels (Ubuntu)
 37 |         run: python -m cibuildwheel --output-dir wheelhouse
 38 |         if: startsWith(matrix.os, 'ubuntu-')
 39 |         env:
 40 |           CIBW_BEFORE_ALL_LINUX: >
 41 |             curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz && 
 42 |             tar -zxvf libarchive-3.6.1.tar.gz && 
 43 |             cd libarchive-3.6.1/ && 
 44 |             ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat && 
 45 |             make && 
 46 |             make install
 47 | 
 48 |       - uses: actions/upload-artifact@v3
 49 |         with:
 50 |           path: ./wheelhouse/*.whl
 51 | 
 52 |   build_sdist:
 53 |     runs-on: ubuntu-latest
 54 |     steps:
 55 |       - uses: actions/checkout@v2
 56 | 
 57 |       - name: Configure Python
 58 |         uses: actions/setup-python@v2
 59 |         with:
 60 |           python-version: 3.11
 61 | 
 62 |       - name: Build Python sdist
 63 |         run: |
 64 |           python -m pip install --upgrade pip wheel setuptools
 65 |           pip install build
 66 |           python -m build --sdist --outdir dist/ .
 67 | 
 68 |       - uses: actions/upload-artifact@v3
 69 |         with:
 70 |           path: dist/*.tar.gz
 71 | 
 72 |   publish:
 73 |     needs: [build_wheels, build_sdist]
 74 |     runs-on: ubuntu-latest
 75 |     steps:
 76 |       - uses: actions/checkout@v2
 77 |         with:
 78 |           ref: ${{ github.event.inputs.release }}
 79 | 
 80 |       - uses: actions/download-artifact@v3
 81 |         with:
 82 |           name: artifact
 83 |           path: dist
 84 | 
 85 |       - name: Extract version
 86 |         run: |
 87 |           git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules
 88 |           pushd /tmp/stacs-rules
 89 |           RULES_VERSION="$(git rev-parse --short HEAD)"
 90 |           popd
 91 |           STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')"
 92 |           echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}"
 93 |           echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}"
 94 | 
 95 |       - name: Publish Python package
 96 |         uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
 97 |         with:
 98 |           user: __token__
 99 |           password: ${{ secrets.PYPI_TOKEN }}
100 | 
101 |       - name: Wait a minute for PyPi to catch up
102 |         run: sleep 60s
103 |         shell: bash
104 | 
105 |       - name: Login to DockerHub
106 |         uses: docker/login-action@v1 
107 |         with:
108 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
109 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
110 | 
111 |       - name: Build and push Docker image
112 |         id: docker_build
113 |         uses: docker/build-push-action@v2
114 |         with:
115 |           context: .
116 |           push: true
117 |           tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }}
118 |           build-args: |
119 |             VERSION=${{ env.IMAGE_VERSION }}
120 |             STACS_BUILD=${{ env.STACS_VERSION }}
121 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_to_testing.yml:
--------------------------------------------------------------------------------
  1 | name: Publish to Testing
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       commit:
  7 |         description: The commit ref to build and release to PyPI testing.
  8 |         required: true
  9 | 
 10 | jobs:
 11 |   build_wheels:
 12 |     name: Build wheels on ${{ matrix.os }}
 13 |     runs-on: ${{ matrix.os }}
 14 |     strategy:
 15 |       matrix:
 16 |         os: [ubuntu-22.04, ubuntu-20.04, macos-11, macos-12]
 17 | 
 18 |     steps:
 19 |       - uses: actions/checkout@v2
 20 |         with:
 21 |           ref: ${{ github.event.inputs.release }}
 22 | 
 23 |       - name: Configure Python
 24 |         uses: actions/setup-python@v3
 25 | 
 26 |       # TODO: This may result in macOS compiling against a newer version of libarchive
 27 |       # than Linux.
 28 |       - name: Install dependencies (macOS)
 29 |         if: startsWith(matrix.os, 'macos-')
 30 |         run: brew install libarchive
 31 | 
 32 |       # This is rather unpleasant and the package versioning should be adjusted to
 33 |       # allow snapshot build numbers to be injected via setuptools, etc.
 34 |       - name: Set development version suffix
 35 |         run: |
 36 |           sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \
 37 |             stacs/scan/__about__.py
 38 | 
 39 |       - name: Install cibuildwheel
 40 |         run: python -m pip install cibuildwheel==2.12.3
 41 | 
 42 |       - name: Build wheels (macOS)
 43 |         run: python -m cibuildwheel --output-dir wheelhouse
 44 |         if: startsWith(matrix.os, 'macos-')
 45 |         env:
 46 |           CIBW_BEFORE_BUILD: pip install pybind11
 47 |           CIBW_ENVIRONMENT: >
 48 |             CPPFLAGS="-std=c++11 -I$(find `brew --cellar libarchive` -name include -type d)" \
 49 |             LDFLAGS="-L$(find `brew --cellar libarchive` -name include -type d)" \
 50 |             PKG_CONFIG="$(find `brew --cellar libarchive` -name pkgconfig -type d)"
 51 | 
 52 |       - name: Build wheels (Ubuntu)
 53 |         run: python -m cibuildwheel --output-dir wheelhouse
 54 |         if: startsWith(matrix.os, 'ubuntu-')
 55 |         env:
 56 |           CIBW_BEFORE_BUILD: pip install pybind11
 57 |           CIBW_BEFORE_ALL_LINUX: >
 58 |             curl -sOL https://github.com/libarchive/libarchive/releases/download/v3.6.1/libarchive-3.6.1.tar.gz && 
 59 |             tar -zxvf libarchive-3.6.1.tar.gz && 
 60 |             cd libarchive-3.6.1/ && 
 61 |             ./configure --without-lzo2 --without-nettle --without-xml2 --without-openssl --with-expat && 
 62 |             make && 
 63 |             make install
 64 | 
 65 |       - uses: actions/upload-artifact@v3
 66 |         with:
 67 |           path: ./wheelhouse/*.whl
 68 | 
 69 |   build_sdist:
 70 |     runs-on: ubuntu-latest
 71 |     steps:
 72 |       - uses: actions/checkout@v2
 73 | 
 74 |       - name: Configure Python
 75 |         uses: actions/setup-python@v2
 76 |         with:
 77 |           python-version: 3.11
 78 | 
 79 |       # This is rather unpleasant and the package versioning should be adjusted to
 80 |       # allow snapshot build numbers to be injected via setuptools, etc.
 81 |       - name: Set development version suffix
 82 |         run: |
 83 |           sed -E -i -r 's/^__version__(.*)?"$/__version__\1-dev.${{ github.run_number }}"/' \
 84 |             stacs/scan/__about__.py
 85 | 
 86 |       - name: Build Python sdist
 87 |         run: |
 88 |           python -m pip install --upgrade pip wheel setuptools
 89 |           pip install build
 90 |           python -m build --sdist --outdir dist/ .
 91 | 
 92 |       - uses: actions/upload-artifact@v3
 93 |         with:
 94 |           path: dist/*.tar.gz
 95 | 
 96 |   publish:
 97 |     needs: [build_wheels, build_sdist]
 98 |     runs-on: ubuntu-latest
 99 |     steps:
100 |       - uses: actions/download-artifact@v3
101 |         with:
102 |           name: artifact
103 |           path: dist
104 | 
105 |       - name: Publish Python package
106 |         uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
107 |         with:
108 |           user: __token__
109 |           password: ${{ secrets.PYPI_TESTING_TOKEN }}
110 |           repository_url: "https://test.pypi.org/legacy/"
111 | 


--------------------------------------------------------------------------------
/.github/workflows/update.yml:
--------------------------------------------------------------------------------
 1 | name: Update
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       release:
 7 |         description: The tagged release version to rebuild with the latest rules.
 8 |         default: 0.0.0
 9 |         required: true
10 | 
11 | jobs:
12 |   update:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v2
16 | 
17 |       - name: Configure Python
18 |         uses: actions/setup-python@v2
19 |         with:
20 |           python-version: 3.11
21 | 
22 |       - name: Extract version
23 |         run: |
24 |           git clone https://www.github.com/stacscan/stacs-rules.git /tmp/stacs-rules
25 |           pushd /tmp/stacs-rules
26 |           RULES_VERSION="$(git rev-parse --short HEAD)"
27 |           popd
28 |           STACS_VERSION="$(python -c 'exec(open("stacs/scan/__about__.py").read()) ; print(__version__, end="")')"
29 |           echo "IMAGE_VERSION=${STACS_VERSION}-r${RULES_VERSION}" >> "${GITHUB_ENV}"
30 |           echo "STACS_VERSION=${STACS_VERSION}" >> "${GITHUB_ENV}"
31 | 
32 |       - name: Login to DockerHub
33 |         uses: docker/login-action@v1 
34 |         with:
35 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
36 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
37 | 
38 |       - name: Build and push Docker image
39 |         id: docker_build
40 |         uses: docker/build-push-action@v2
41 |         with:
42 |           context: .
43 |           push: true
44 |           tags: stacscan/stacs:latest,stacscan/stacs:${{ env.IMAGE_VERSION }}
45 |           build-args: |
46 |             VERSION=${{ env.IMAGE_VERSION }}
47 |             STACS_BUILD=${{ env.STACS_VERSION }}
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | # Tracker
141 | TODO.md
142 | 
143 | # Profiling information.
144 | *.prof
145 | result.json
146 | stacs-rules/
147 | 
148 | # macOS files.
149 | .DS_Store
150 | 
151 | # Wheel build.
152 | wheelhouse/
153 | 
154 | # Ignore compiled shared objects.
155 | *.so
156 | 


--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Linux",
 5 |             "includePath": [
 6 |                 "${workspaceFolder}/**",
 7 |                 "/usr/include/python3.9"
 8 |             ],
 9 |             "defines": [],
10 |             "compilerPath": "/usr/bin/gcc",
11 |             "cStandard": "gnu17",
12 |             "cppStandard": "gnu++14",
13 |             "intelliSenseMode": "linux-gcc-x64"
14 |         }
15 |     ],
16 |     "version": 4
17 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.linting.pylintEnabled": false,
 3 |     "python.linting.flake8Enabled": true,
 4 |     "python.linting.enabled": true,
 5 |     "python.formatting.provider": "black",
 6 |     "editor.formatOnSave": true,
 7 |     "editor.codeActionsOnSave": {
 8 |         "source.organizeImports": true
 9 |     },
10 |     "editor.rulers": [
11 |         79,
12 |         88
13 |     ],
14 |     "C_Cpp.clang_format_fallbackStyle": "{ BasedOnStyle: Google, IndentWidth: 4, ColumnLimit: 0}"
15 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-alpine
 2 | 
 3 | # Allow build-time specification of version.
 4 | ARG VERSION
 5 | ARG STACS_BUILD
 6 | 
 7 | # Allow runtime tuning.
 8 | ENV STACS_SKIP_UNPROCESSABLE=0
 9 | ENV STACS_THREADS=10
10 | ENV STACS_DEBUG=0
11 | ENV STACS_OUTPUT_PRETTY=0
12 | 
13 | # Keep things friendly.
14 | LABEL org.opencontainers.image.title="STACS"
15 | LABEL org.opencontainers.image.description="Static Token And Credential Scanner"
16 | LABEL org.opencontainers.image.url="https://www.github.com/stacscan/stacs"
17 | LABEL org.opencontainers.image.version=$VERSION
18 | 
19 | # Install STACS into the container.
20 | WORKDIR /opt/stacs
21 | COPY wrapper/stacs-scan /usr/bin
22 | 
23 | RUN apk add --no-cache git gcc musl-dev zstd && \
24 |     pip install --no-cache-dir stacs==$STACS_BUILD
25 | 
26 | # Clone the latest STACS rules into the rules directory to enable out of the box use.
27 | # This can be mounted over using a volume mount to allow more specific rules to be
28 | # loaded. The same is true for "ignore-lists". Finally, there is a "cache" directory
29 | # configured as a mount to allow scans which need a lot of disk space to mount a scratch
30 | # volume so that Docker doesn't run out of disk :)
31 | RUN mkdir -p /mnt/stacs/input /mnt/stacs/rules /mnt/stacs/ignore /mnt/stacs/cache && \
32 |     git clone https://www.github.com/stacscan/stacs-rules /mnt/stacs/rules
33 | 
34 | # Define a volume to allow mounting a local directory to scan.
35 | VOLUME /mnt/stacs/input
36 | VOLUME /mnt/stacs/rules
37 | VOLUME /mnt/stacs/ignore
38 | VOLUME /mnt/stacs/cache
39 | 
40 | # Clean up.
41 | RUN apk del --purge git musl-dev gcc
42 | 
43 | # Default to running stacs with the volume mounts.
44 | ENTRYPOINT ["stacs-scan"]
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, Peter Adkins
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Shield](https://img.shields.io/github/actions/workflow/status/stacscan/stacs/check.yml?label=Tests&style=flat-square)](https://github.com/stacscan/stacs/actions?workflow=Check)
  2 | [![Shield](https://img.shields.io/github/actions/workflow/status/stacscan/stacs/publish.yml?label=Deploy&style=flat-square)](https://github.com/stacscan/stacs/actions?workflow=Publish)
  3 | [![Shield](https://img.shields.io/docker/pulls/stacscan/stacs?style=flat-square)](https://hub.docker.com/r/stacscan/stacs)
  4 | [![Shield](https://img.shields.io/docker/image-size/stacscan/stacs?style=flat-square)](https://hub.docker.com/r/stacscan/stacs/tags?page=1&ordering=last_updated)
  5 | [![Shield](https://img.shields.io/twitter/follow/stacscan?style=flat-square)](https://twitter.com/stacscan)
  6 | <p align="center">
  7 |     <br /><br />
  8 |     <img src="https://raw.githubusercontent.com/stacscan/stacs/main/docs/images/STACS-Logo-RGB.small.png">
  9 | </p>
 10 | <p align="center">
 11 |     <br />
 12 |     <b>Static Token And Credential Scanner</b>
 13 |     <br />
 14 | </p>
 15 | 
 16 | ### What is it?
 17 | 
 18 | STACS is a [YARA](https://virustotal.github.io/yara/) powered static credential scanner
 19 | which suports binary file formats, analysis of nested archives, composable rulesets
 20 | and ignore lists, and SARIF reporting.
 21 | 
 22 | ### What does STACS support?
 23 | 
 24 | Currently, STACS supports recursive unpacking of:
 25 | 
 26 | * 7z, ar, bz2, cab, cpio, gz, iso, rar, rpm, tar, xar, xz, zip, dmg
 27 | 
 28 | As STACS works on detected file types, proprietary file formats based and other
 29 | file-types which use these formats are automatically supported. This includes Docker
 30 | images, Android APKs, Java JAR files, RPMs, Debian packages (`.deb`), macOS packages
 31 | (`.pkg`), and more!
 32 | 
 33 | ### Who should use STACS?
 34 | 
 35 | STACS is designed for use by any teams who release binary artifacts. STACS provides
 36 | developers the ability to automatically check for accidental inclusion of static
 37 | credentials and key material in their releases.
 38 | 
 39 | However, this doesn't mean STACS can't help with SaaS applications, enterprise
 40 | software, or even source code!
 41 | 
 42 | As an example, STACS can be used to find static credentials in Docker images uploaded
 43 | to public and private container registries. It can also be used to find credentials
 44 | accidentally compiled in to executables, packages for mobile devices, and "enterprise
 45 | archives" - such as those used by Java application servers.
 46 | 
 47 | ### How does it work?
 48 | 
 49 | STACS detects static credentials using "rule packs" provided to STACS when run. These
 50 | rule packs define a set of YARA rules to run against files provided to STACS. When a
 51 | match against a rule is found, a "finding" is generated. These findings represent
 52 | potential credentials inside of a file, and are reported on for a developer to remediate
 53 | or "ignore".
 54 | 
 55 | If the finding is found to be a false positive - that is, a match on something other
 56 | than a real credential - the developer can generate a set of "ignore lists" to ensure
 57 | that these matches don't appear in future reports.
 58 | 
 59 | The real power from STACS comes from the automatic detection and unpacking of nested
 60 | archives, and composable ignore lists and rule packs.
 61 | 
 62 | #### Ignore lists?
 63 | 
 64 | In order to allow flexible and collaborative usage, STACS supports composable ignore
 65 | lists. This allows for an ignore list to include other ignore lists which enable
 66 | composition of a "tree of ignores" based on organisational guidelines. These ignore
 67 | lists are especially useful in organisations where many of the same frameworks or
 68 | products are used. If a team has already marked a finding as a false positive, other
 69 | teams get the benefit of not having to triage the same finding.
 70 | 
 71 | #### Rule packs?
 72 | 
 73 | In the same manner as ignore lists, rule packs are also composable. This enables an
 74 | organisation to define a baseline set of rules for use by all teams, while still
 75 | allowing teams to maintain rulesets specific to their products.
 76 | 
 77 | ### How do I use it?
 78 | 
 79 | The easiest way to use STACS is using the Docker images published to Docker Hub.
 80 | However, STACS can also be installed directly from Python's PyPI, or by cloning this
 81 | repository. See the relevant sections below to get started!
 82 | 
 83 | A cloud based service is coming soon which allows integration directly in build
 84 | and release pipelines to enable detection of static credentials before release!
 85 | 
 86 | #### Docker
 87 | 
 88 | Using the published images, STACS can be used to scan artifacts right away! The STACS
 89 | Docker images provides a number of volume mounts for files wanted to be scanned to be
 90 | mounted directly into the scan container.
 91 | 
 92 | As an example, to scan everything in the current folder, the following command can be
 93 | run (Docker must be installed).
 94 | 
 95 | ```
 96 | docker run \
 97 |     --rm \
 98 |     -v "$(pwd):/mnt/stacs/input:ro" \
 99 |     stacscan/stacs:latest
100 | ```
101 | 
102 | If you would like to receive "pretty" readable output, the following command should be
103 | used:
104 | 
105 | ```
106 | docker run \
107 |     --rm \
108 |     -e STACS_OUTPUT_PRETTY=1 \
109 |     -v "$(pwd):/mnt/stacs/input:ro" \
110 |     stacscan/stacs:latest
111 | ```
112 | 
113 | By default, STACS will output any findings in SARIF format directly to STDOUT and in
114 | order to keep things orderly, all log messages will be sent to STDERR. For more advanced
115 | use cases, a number of other volume mounts are provided. These allow the user to control
116 | the rule packs, ignore lists, and a cache directories to use.
117 | 
118 | #### PyPi
119 | 
120 | STACS can also be installed directly from Python's PyPi. This provides a `stacs` command
121 | which can then be used by developers to scan projects directly in their local
122 | development environments.
123 | 
124 | STACS can be installed directly from PyPi using:
125 | 
126 | ```
127 | pip install stacs
128 | ```
129 | 
130 | **Please Note:** The PyPi release of STACS does not come with any rules. These will also
131 | need to be cloned from the [community rules repository](https://github.com/stacscan/stacs-rules)
132 | for STACS to work!
133 | 
134 | ### FAQ
135 | 
136 | #### Is there a hosted version of STACS?
137 | 
138 | Not yet. However, there are plans for a hosted version of STACS which can be easily
139 | integrated into existing build systems, and which contains additional prebuilt rule
140 | packs and ignore lists.
141 | 
142 | #### What do I do about false positives?
143 | 
144 | Unfortunately, false positives are an inevitable side effect during the detection of
145 | static credentials. If rules are too granular then rule maintenance becomes a burden
146 | and STACS may miss credentials. If rules are too coarse then STACS may generate too
147 | many false positives!
148 | 
149 | In order to assist, STACS provides a number of tools to assist with reducing the number
150 | of false positives which make it into final reports.
151 | 
152 | Primarily, STACS provides a mechanism which allows users to define composable ignore
153 | lists which allow a set of findings to be "ignored". These rules can be as coarse as
154 | ignoring all files based on a pattern, or as granular as a specific finding on a
155 | particular line of a file.
156 | 
157 | This information is automatically propagated through into reports, so "ignored" findings
158 | will be marked as "suppressed" in SARIF output while also including the reason for the
159 | ignore in the output for tracking.
160 | 
161 | #### How do I view the results?
162 | 
163 | If using "pretty" output (`--pretty` / `STACS_OUTPUT_PRETTY`), results will be printed
164 | in a human readable format to the console.
165 | 
166 | ![Human Output](https://raw.githubusercontent.com/stacscan/stacs/main/docs/images/Human-Output-Example.png)
167 | 
168 | If using SARIF, there are a number of viewers available which make this data easier to
169 | read, such as [this great web based viewer from](https://microsoft.github.io/sarif-web-component/)
170 | Microsoft. An example of the findings from a Docker container image has been included
171 | below:
172 | 
173 | ![Microsoft SARIF Viewer Output](https://raw.githubusercontent.com/stacscan/stacs/main/docs/images/SARIF-Viewer-Example.png)
174 | 
175 | #### The performance is really, really bad when running in Docker on macOS!
176 | 
177 | Unfortunately, this appears to be due to a limitation of Docker Desktop for Mac. I/O
178 | for bind mounts [is really, really slow](https://github.com/docker/for-mac/issues/3677).
179 | 


--------------------------------------------------------------------------------
/docs/images/Human-Output-Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/Human-Output-Example.png


--------------------------------------------------------------------------------
/docs/images/SARIF-Viewer-Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/SARIF-Viewer-Example.png


--------------------------------------------------------------------------------
/docs/images/STACS-Logo-RGB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.png


--------------------------------------------------------------------------------
/docs/images/STACS-Logo-RGB.small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/docs/images/STACS-Logo-RGB.small.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools", "setuptools-scm", "pybind11"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "stacs"
  7 | readme = "README.md"
  8 | description = "Static Token And Credential Scanner."
  9 | requires-python = ">=3.8"
 10 | dynamic = ["version"]
 11 | authors = [{name = "Peter Adkins"}]
 12 | license = {text = "BSD-3-Clause"}
 13 | classifiers = [
 14 |     "Programming Language :: Python :: 3.8",
 15 |     "Natural Language :: English",
 16 | ]
 17 | dependencies = [
 18 |     "click>=8.1.0,<9.0",
 19 |     "yara-python==4.2.3",
 20 |     "pydantic>=1.10.0,<2.0",
 21 |     "colorama>=0.4.0,<1.0",
 22 |     "zstandard>=0.18.0,<1.0",
 23 | ]
 24 | 
 25 | [project.optional-dependencies]
 26 | tests = [
 27 |     "black",
 28 |     "coverage",
 29 |     "ruff",
 30 |     "types-setuptools",
 31 |     "mypy",
 32 |     "pip-tools",
 33 |     "mock",
 34 |     "pytest",
 35 |     "pytest-cov",
 36 |     "responses",
 37 |     "tox",
 38 |     "ipython",
 39 | ]
 40 | 
 41 | [tool.setuptools.dynamic]
 42 | version = {attr = "stacs.scan.__about__.__version__"}
 43 | 
 44 | [tool.setuptools.packages.find]
 45 | where = ["."]
 46 | include = ["stacs.*"]
 47 | 
 48 | [project.scripts]
 49 | stacs = "stacs.scan.entrypoint.cli:main"
 50 | 
 51 | [tool.ruff]
 52 | line-length = 88
 53 | extend-select = [
 54 |   "B",   # flake8-bugbear
 55 |   "I",   # isort
 56 | ]
 57 | ignore = [
 58 |   "B904",
 59 |   "I001",
 60 |   "B005",
 61 | ]
 62 | 
 63 | [tool.mypy]
 64 | files = [
 65 |     "./stacs/**/*.py",
 66 |     "./tests/**/*.py"
 67 | ]
 68 | allow_redefinition = false
 69 | check_untyped_defs = true
 70 | disallow_any_generics = true
 71 | disallow_untyped_calls = false
 72 | ignore_errors = false
 73 | ignore_missing_imports = true
 74 | implicit_reexport = false
 75 | local_partial_types = true
 76 | strict_optional = true
 77 | strict_equality = true
 78 | no_implicit_optional = true
 79 | warn_no_return = true
 80 | warn_unused_ignores = true
 81 | warn_redundant_casts = true
 82 | warn_unused_configs = true
 83 | warn_unreachable = true
 84 | 
 85 | [tool.pytest.ini_options]
 86 | junit_family = "xunit2"
 87 | norecursedirs = ".*"
 88 | self-contained-html = true
 89 | testpaths = [
 90 |     "tests"
 91 | ]
 92 | addopts = """
 93 |     --strict
 94 |     --tb=auto
 95 |     --cov=stacs
 96 |     --cov-report=term-missing:skip-covered
 97 |     --cov-branch
 98 |     -p no:doctest
 99 |     -p no:warnings
100 |     -s
101 | """
102 | 
103 | [tool.tox]
104 | legacy_tox_ini = """
105 |     [tox]
106 |     envlist = linters,py3
107 | 
108 |     [testenv]
109 |     pip_version = pip
110 |     extras = tests
111 |     commands = pytest -c pyproject.toml
112 |     srcs = stacs
113 | 
114 |     [testenv:linters]
115 |     basepython = python3
116 |     usedevelop = true
117 |     commands =
118 |         {[testenv:ruff]commands}
119 | 
120 |     [testenv:ruff]
121 |     basepython = python3
122 |     skip_install = true
123 |     commands =
124 |         ruff check {[testenv]srcs}
125 | 
126 |     [testenv:mypy]
127 |     basepython3 = python3
128 |     skip_install = true
129 |     commands =
130 |         - mypy --config-file pyproject.toml {[testenv]srcs}
131 | """
132 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup required for pybind11 built native code only."""
 2 | 
 3 | import os
 4 | import platform
 5 | import subprocess
 6 | from typing import List
 7 | 
 8 | from pybind11.setup_helpers import Pybind11Extension
 9 | from setuptools import setup
10 | 
11 | ext_modules = [
12 |     Pybind11Extension(
13 |         "stacs.native.archive",
14 |         ["stacs/native/archive/src/archive.cpp"],
15 |         libraries=["archive"],
16 |     ),
17 | ]
18 | 
19 | 
20 | def run(command: List[str]):
21 |     """Run a command, returning the output as a string or an exception on failure."""
22 |     result = subprocess.run(command, capture_output=True, check=True)
23 |     return str(result.stdout, "utf-8").strip()
24 | 
25 | 
26 | # macOS requires a bit of special handling to ensure that the - likely - brew installed
27 | # libarchive is discoverable. The macOS built-in libarchive is no good, as it's too
28 | # old.
29 | if platform.system() == "Darwin":
30 |     libarchive = run(["brew", "--cellar", "libarchive"])
31 |     libarchive_headers = run(["find", libarchive, "-name", "include", "-type", "d"])
32 |     libarchive_pkgconfig = run(["find", libarchive, "-name", "pkgconfig", "-type", "d"])
33 | 
34 |     # Setup the environment for the build.
35 |     os.environ["LDFLAGS"] = f"-L{libarchive_headers}"
36 |     os.environ["PKG_CONFIG"] = libarchive_pkgconfig
37 |     os.environ["CPPFLAGS"] = " ".join(
38 |         [
39 |             os.environ.get("CPPFLAGS", ""),
40 |             "-std=c++11",
41 |             f"-I{libarchive_headers}",
42 |         ]
43 |     )
44 | 
45 | setup(ext_modules=ext_modules, packages=[])
46 | 


--------------------------------------------------------------------------------
/stacs/__init__.py:
--------------------------------------------------------------------------------
1 | """STACS - Static Token And Credential Scanner.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | __import__("pkg_resources").declare_namespace(__name__)
7 | 


--------------------------------------------------------------------------------
/stacs/native/archive/src/archive.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file archive.cpp
 3 |  * @author Peter Adkins
 4 |  * @date 2022-07-02
 5 |  */
 6 | 
 7 | #include <pybind11/pybind11.h>
 8 | 
 9 | #include "archiveentry.cpp"
10 | #include "archivereader.cpp"
11 | 
12 | namespace py = pybind11;
13 | 
14 | PYBIND11_MODULE(archive, module) {
15 |     module.doc() = "STACS Native Extensions for Archives";
16 |     module.attr("__name__") = "stacs.native.archive";
17 | 
18 |     py::class_<ArchiveReader>(module, "ArchiveReader")
19 |         .def(py::init<const std::string &>())
20 |         .def_property_readonly("filename", &ArchiveReader::getFilename)
21 |         .def("__enter__", &ArchiveReader::enter)
22 |         .def("__exit__", &ArchiveReader::exit)
23 |         .def("__iter__", &ArchiveReader::iter)
24 |         .def("__next__", &ArchiveReader::next)
25 |         .def("read", &ArchiveReader::read)
26 |         .doc() = "An interface to read archive contents (via libarchive)";
27 | 
28 |     py::class_<ArchiveEntry>(module, "ArchiveEntry")
29 |         .def_property_readonly("filename", &ArchiveEntry::getFilename)
30 |         .def_property_readonly("isdir", &ArchiveEntry::isDirectory)
31 |         .def_property_readonly("size", &ArchiveEntry::getSize)
32 |         .doc() = "Represents a member of an Archive";
33 | 
34 |     py::register_exception<ArchiveError>(module, "ArchiveError");
35 | }
36 | 


--------------------------------------------------------------------------------
/stacs/native/archive/src/archiveentry.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file archivereader.cpp
 3 |  * @author Peter Adkins
 4 |  * @date 2022-07-02
 5 |  */
 6 | 
 7 | #include "archiveentry.hpp"
 8 | 
 9 | #include <sys/stat.h>
10 | 
11 | #include <string>
12 | 
13 | ArchiveEntry::ArchiveEntry(struct archive_entry *entry) {
14 |     this->entry = entry;
15 | }
16 | 
17 | ArchiveEntry::~ArchiveEntry() {
18 | }
19 | 
20 | /**
21 |  * Gets the filename of the archive member.
22 |  *
23 |  * @return std::string
24 |  */
25 | std::string ArchiveEntry::getFilename() {
26 |     return archive_entry_pathname_utf8(this->entry);
27 | }
28 | 
29 | /**
30 |  * Gets the file size of the archive member.
31 |  *
32 |  * @return int64_t
33 |  */
34 | int64_t ArchiveEntry::getSize() {
35 |     return archive_entry_size(this->entry);
36 | }
37 | 
38 | /**
39 |  * Checks whether the current archive member is a directory.
40 |  *
41 |  * @return bool
42 |  */
43 | bool ArchiveEntry::isDirectory() {
44 |     if (S_ISDIR(archive_entry_mode(this->entry)) != 0) {
45 |         return true;
46 |     } else {
47 |         return false;
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/stacs/native/archive/src/archiveentry.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file archivereader.cpp
 3 |  * @author Peter Adkins
 4 |  * @date 2022-07-02
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | extern "C" {
10 | #include <archive.h>
11 | #include <archive_entry.h>
12 | }
13 | 
14 | #include <string>
15 | 
16 | class ArchiveEntry {
17 |    public:
18 |     ArchiveEntry(struct archive_entry *entry);
19 |     ~ArchiveEntry();
20 | 
21 |     std::string getFilename();
22 |     int64_t getSize();
23 |     bool isDirectory();
24 | 
25 |    private:
26 |     struct archive_entry *entry;
27 | };
28 | 


--------------------------------------------------------------------------------
/stacs/native/archive/src/archivereader.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file archivereader.cpp
  3 |  * @author Peter Adkins
  4 |  * @date 2022-07-02
  5 |  */
  6 | 
  7 | #include "archivereader.hpp"
  8 | 
  9 | #include "archiveentry.hpp"
 10 | 
 11 | extern "C" {
 12 | #include <archive.h>
 13 | #include <archive_entry.h>
 14 | }
 15 | 
 16 | const char *ArchiveError::what() const noexcept {
 17 |     return "Unable to open archive for reading\n";
 18 | }
 19 | 
 20 | ArchiveReader::ArchiveReader(const std::string &filename) : filename(filename) {
 21 | }
 22 | 
 23 | ArchiveReader::~ArchiveReader() {
 24 | }
 25 | 
 26 | ArchiveReader *ArchiveReader::iter() {
 27 |     return this;
 28 | }
 29 | 
 30 | /**
 31 |  * Gets the filename of the currently open file.
 32 |  *
 33 |  * @return std::string
 34 |  */
 35 | std::string ArchiveReader::getFilename() {
 36 |     return this->filename;
 37 | }
 38 | 
 39 | /**
 40 |  * Reads the currently selected archive member into a buffer, returning the
 41 |  * number of bytes read. 0 will be returned when no more data is available.
 42 |  *
 43 |  * @return int
 44 |  */
 45 | pybind11::bytes ArchiveReader::read() {
 46 |     std::vector<char> chunk;
 47 |     chunk.resize(CHUNK_SIZE);
 48 | 
 49 |     int result = archive_read_data(this->archive,
 50 |                                    chunk.data(),
 51 |                                    chunk.size());
 52 | 
 53 |     if (result < 0) {
 54 |         throw ArchiveError();
 55 |     }
 56 | 
 57 |     return pybind11::bytes(chunk.data(), result);
 58 | }
 59 | 
 60 | /**
 61 |  * Find and return the next member in the archive.
 62 |  *
 63 |  * @return ArchiveEntry
 64 |  */
 65 | ArchiveEntry ArchiveReader::next() {
 66 |     int result = archive_read_next_header(this->archive, &this->entry);
 67 | 
 68 |     if (result == ARCHIVE_OK) {
 69 |         return ArchiveEntry(this->entry);
 70 |     }
 71 |     if (result == ARCHIVE_EOF) {
 72 |         throw pybind11::stop_iteration();
 73 |     }
 74 | 
 75 |     throw ArchiveError();
 76 | }
 77 | 
 78 | /**
 79 |  * Loads an archive on Python Context Manager enter.
 80 |  *
 81 |  * @return ArchiveReader*
 82 |  */
 83 | ArchiveReader *ArchiveReader::enter() {
 84 |     this->archive = archive_read_new();
 85 | 
 86 |     // Enable all libarchive supported filters and formats.
 87 |     archive_read_support_filter_all(this->archive);
 88 |     archive_read_support_format_all(this->archive);
 89 | 
 90 |     // Attempt to open the archive.
 91 |     int result = archive_read_open_filename(this->archive,
 92 |                                             this->filename.c_str(),
 93 |                                             10240);
 94 | 
 95 |     if (result != ARCHIVE_OK) {
 96 |         throw ArchiveError();
 97 |     }
 98 | 
 99 |     return this;
100 | }
101 | 
102 | /**
103 |  * Cleans up the open archive on Python Context Manager exit.
104 |  *
105 |  * @return true
106 |  */
107 | bool ArchiveReader::exit(pybind11::object exc_type,
108 |                          pybind11::object exc_value,
109 |                          pybind11::object exc_traceback) {
110 |     int result = archive_read_free(this->archive);
111 | 
112 |     if (result == ARCHIVE_OK) {
113 |         return true;
114 |     }
115 | 
116 |     return false;
117 | }
118 | 


--------------------------------------------------------------------------------
/stacs/native/archive/src/archivereader.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file archivereader.hpp
 3 |  * @author Peter Adkins
 4 |  * @date 2022-07-02
 5 |  */
 6 | 
 7 | #pragma once
 8 | #include <pybind11/pybind11.h>
 9 | 
10 | #include <iostream>
11 | #include <string>
12 | 
13 | const int CHUNK_SIZE = 10240;
14 | 
15 | class ArchiveEntry;
16 | 
17 | class ArchiveReader {
18 |    public:
19 |     ArchiveReader(const std::string &filename);
20 |     ~ArchiveReader();
21 | 
22 |     ArchiveReader *enter();
23 |     bool exit(pybind11::object exc_type,
24 |               pybind11::object exc_value,
25 |               pybind11::object exc_traceback);
26 | 
27 |     pybind11::bytes read();
28 |     ArchiveEntry next();
29 |     ArchiveReader *iter();
30 |     std::string getFilename();
31 | 
32 |    private:
33 |     std::vector<char> chunk;
34 |     std::string filename;
35 |     struct archive *archive;
36 |     struct archive_entry *entry;
37 | };
38 | 
39 | struct ArchiveError : std::exception {
40 |     const char *what() const noexcept;
41 | };
42 | 


--------------------------------------------------------------------------------
/stacs/scan/__about__.py:
--------------------------------------------------------------------------------
 1 | """STACS - Static Token And Credential Scanner.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | __title__ = "stacs"
 7 | __summary__ = "Static Token And Credential Scanner."
 8 | __version__ = "0.5.1"
 9 | __author__ = "Peter Adkins"
10 | __uri__ = "https://www.github.com/stacscan/stacs/"
11 | __license__ = "BSD-3-Clause"
12 | 


--------------------------------------------------------------------------------
/stacs/scan/__init__.py:
--------------------------------------------------------------------------------
 1 | """STACS - Static Token And Credential Scanner.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | from stacs.scan import __about__  # noqa:F401
 7 | from stacs.scan import constants  # noqa:F401
 8 | from stacs.scan import filter  # noqa:F401
 9 | from stacs.scan import helper  # noqa:F401
10 | from stacs.scan import loader  # noqa:F401
11 | from stacs.scan import model  # noqa:F401
12 | from stacs.scan import output  # noqa:F401
13 | from stacs.scan import scanner  # noqa:F401
14 | 


--------------------------------------------------------------------------------
/stacs/scan/constants.py:
--------------------------------------------------------------------------------
 1 | """Define constants commonly used throughout STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | # The size of chunks to use when reading files.
 7 | CHUNK_SIZE = 65536
 8 | 
 9 | # The size, in bytes, of the sample window.
10 | WINDOW_SIZE = 20
11 | 
12 | # Define the default cache directory, used to unpack archives into.
13 | CACHE_DIRECTORY = "/tmp"
14 | 
15 | # Define the character to use when constructed paths to findings which are inside of
16 | # archives.
17 | ARCHIVE_FILE_SEPARATOR = "!"
18 | 
19 | # Define an exit code to use when there are unsuppressed findings.
20 | EXIT_CODE_UNSUPPRESSED = 100
21 | 
22 | # External licenses will be displayed during STACS banner.
23 | EXTERNAL_LICENSES = {
24 |     "libarchive": [
25 |         "https://github.com/libarchive/libarchive/blob/master/COPYING",
26 |     ],
27 |     "yara": [
28 |         "https://github.com/VirusTotal/yara-python/blob/master/LICENSE",
29 |     ],
30 | }
31 | 


--------------------------------------------------------------------------------
/stacs/scan/entrypoint/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines STACS entrypoints.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | from stacs.scan.entrypoint import cli  # noqa:F401
7 | 


--------------------------------------------------------------------------------
/stacs/scan/entrypoint/cli.py:
--------------------------------------------------------------------------------
  1 | """Defines the primary STACS CLI entrypoint.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import logging
  7 | import os
  8 | import shutil
  9 | import sys
 10 | import time
 11 | from types import TracebackType
 12 | from typing import Callable, List
 13 | 
 14 | import click
 15 | import stacs
 16 | 
 17 | 
 18 | def unlink_error(function: Callable, path: str, exc_info: TracebackType):
 19 |     """Provides a mechanism to better handle failures to delete files after a run.
 20 | 
 21 |     Currently, this just logs out. In future we should look to fix the permissions on
 22 |     the path / parent and call func(path) to attempt the deletion again. However, we'll
 23 |     need to ensure that path is actually part of the cache directory. So for now, we
 24 |     log.
 25 |     """
 26 |     logger = logging.getLogger("stacs")
 27 |     logger.warning(f"Unable to remove {path}")
 28 | 
 29 | 
 30 | @click.command()
 31 | @click.version_option()
 32 | @click.option(
 33 |     "--debug",
 34 |     is_flag=True,
 35 |     help="Increase verbosity of logs for debugging",
 36 | )
 37 | @click.option(
 38 |     "--pretty",
 39 |     help="Display outputs in a human-readable tree, rather than SARIF.",
 40 |     is_flag=True,
 41 | )
 42 | @click.option(
 43 |     "--threads",
 44 |     help="The number of threads to use when processing files",
 45 |     default=10,
 46 | )
 47 | @click.option(
 48 |     "--rule-pack",
 49 |     help="The path to the rule pack to load.",
 50 |     default="~/.stacs/pack.json",
 51 | )
 52 | @click.option(
 53 |     "--ignore-list",
 54 |     help="The path to the ignore list to load (if required).",
 55 | )
 56 | @click.option(
 57 |     "--skip-unprocessable",
 58 |     help="Skip unprocessable / corrupt archives with a warning.",
 59 |     is_flag=True,
 60 | )
 61 | @click.option(
 62 |     "--cache-directory",
 63 |     help="The path to use as a cache - used when unpacking archives.",
 64 |     default=stacs.scan.constants.CACHE_DIRECTORY,
 65 | )
 66 | @click.argument("paths", nargs=-1, required=True)
 67 | def main(
 68 |     debug: bool,
 69 |     pretty: bool,
 70 |     threads: int,
 71 |     rule_pack: str,
 72 |     ignore_list: str,
 73 |     skip_unprocessable: bool,
 74 |     cache_directory: str,
 75 |     paths: List[str],
 76 | ) -> None:
 77 |     """STACS - Static Token And Credential Scanner."""
 78 |     logging.basicConfig(
 79 |         level=logging.DEBUG if debug else logging.INFO,
 80 |         format="%(asctime)s - %(process)d - [%(levelname)s] %(message)s",
 81 |     )
 82 |     logger = logging.getLogger("stacs")
 83 |     logger.info(f"STACS running with {threads} threads")
 84 | 
 85 |     # Licenses.
 86 |     for project, urls in stacs.scan.constants.EXTERNAL_LICENSES.items():
 87 |         logger.info(f"STACS uses {project} (licenses may be found at {' '.join(urls)})")
 88 | 
 89 |     # Load the rule pack.
 90 |     logger.info(f"Attempting to load rule pack from {rule_pack}")
 91 |     try:
 92 |         pack = stacs.scan.model.pack.from_file(rule_pack)
 93 |     except stacs.scan.exceptions.STACSException as err:
 94 |         logger.error(f"Unable to load rule pack: {err}")
 95 |         sys.exit(-1)
 96 | 
 97 |     # Load the ignore list.
 98 |     ignored = []
 99 |     if ignore_list:
100 |         logger.info(f"Attempting to load ignore list from {ignore_list}")
101 |         try:
102 |             ignored = stacs.scan.model.ignore_list.from_file(ignore_list)
103 |             logger.debug(f"Loaded {len(ignored.ignore)} suppressions from ignore list.")
104 |         except stacs.scan.exceptions.STACSException as err:
105 |             logger.error(f"Unable to load ignore list: {err}")
106 |             sys.exit(-1)
107 | 
108 |     # Append a timestamp to the cache directory to reduce the chance of collisions.
109 |     cache_directory = os.path.join(cache_directory, str(int(time.time_ns() / 1000)))
110 |     try:
111 |         os.mkdir(cache_directory)
112 |         logger.info(f"Using cache directory at {cache_directory}")
113 |     except OSError as err:
114 |         logger.error(f"Unable to create cache directory at {cache_directory}: {err}")
115 |         sys.exit(-2)
116 | 
117 |     # Generate a list of candidate files to scan.
118 |     targets = []
119 | 
120 |     for path in paths:
121 |         path = os.path.abspath(os.path.expanduser(path))
122 |         logger.info(f"Attempting to get a list of files to scan from {path}")
123 |         try:
124 |             targets.extend(
125 |                 stacs.scan.loader.filepath.finder(
126 |                     path,
127 |                     cache_directory,
128 |                     skip_on_corrupt=skip_unprocessable,
129 |                     workers=threads,
130 |                 )
131 |             )
132 |         except stacs.scan.exceptions.STACSException as err:
133 |             logger.error(f"Unable to generate file list: {err}")
134 |             sys.exit(-2)
135 | 
136 |     # Submit files for analysis.
137 |     logger.info(f"Found {len(targets)} files for analysis")
138 | 
139 |     findings = []
140 |     for scanner in stacs.scan.scanner.__all__:
141 |         try:
142 |             findings.extend(
143 |                 getattr(stacs.scan.scanner, scanner).run(targets, pack, workers=threads)
144 |             )
145 |         except stacs.scan.exceptions.InvalidFormatException as err:
146 |             logger.error(f"Unable to load a rule in scanner {scanner}: {err}")
147 |             continue
148 | 
149 |     # Filter findings by allow list.
150 |     if ignored:
151 |         findings = stacs.scan.filter.ignore_list.process(findings, ignored)
152 | 
153 |     # Clean-up cache directory.
154 |     shutil.rmtree(cache_directory, onerror=unlink_error)
155 | 
156 |     # Determine the correct exit status based on whether there were unsuppressed
157 |     # findings.
158 |     exit_code = 0
159 | 
160 |     for finding in findings:
161 |         if not finding.ignore:
162 |             exit_code = stacs.scan.constants.EXIT_CODE_UNSUPPRESSED
163 | 
164 |     # Pretty print, if requested.
165 |     if pretty:
166 |         logger.info("Generating 'pretty' output from findings")
167 |         stacs.scan.output.pretty.render(findings, pack)
168 |         sys.exit(exit_code)
169 | 
170 |     # Default to SARIF output to STDOUT.
171 |     logger.info("Generating SARIF from findings")
172 |     try:
173 |         sarif = stacs.scan.output.sarif.render(path, findings, pack)
174 |     except stacs.scan.exceptions.STACSException as err:
175 |         logger.error(f"Unable to generate SARIF: {err}")
176 |         sys.exit(-3)
177 | 
178 |     # TODO: Add file output as an option.
179 |     logger.info(f"Found {len(findings)} findings")
180 |     print(sarif)
181 | 


--------------------------------------------------------------------------------
/stacs/scan/exceptions.py:
--------------------------------------------------------------------------------
 1 | """STACS Exceptions.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | 
 7 | class STACSException(Exception):
 8 |     """The most generic form of exception raised by STACS."""
 9 | 
10 | 
11 | class FileAccessException(STACSException):
12 |     """Indicates an error occured while attempting to access a file."""
13 | 
14 | 
15 | class InvalidFileException(STACSException):
16 |     """Indicates the format of a file did not match what was expected."""
17 | 
18 | 
19 | class InvalidFormatException(STACSException):
20 |     """Indicates that the format of a rule did not match what was expected."""
21 | 
22 | 
23 | class IgnoreListException(STACSException):
24 |     """Indicates an invalid ignore list was provided."""
25 | 
26 | 
27 | class NotImplementedException(STACSException):
28 |     """Indicates that the requested method has not been implemented."""
29 | 
30 | 
31 | class NoParentException(STACSException):
32 |     """Indicates that a finding does not have a parent."""
33 | 


--------------------------------------------------------------------------------
/stacs/scan/filter/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines filters supported by STACS.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | from stacs.scan.filter import ignore_list  # noqa: F401
7 | 


--------------------------------------------------------------------------------
/stacs/scan/filter/ignore_list.py:
--------------------------------------------------------------------------------
  1 | """Defines a filter which sets the ignore flag on entries present in an ignore list.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import re
  7 | from typing import List
  8 | 
  9 | from stacs.scan.exceptions import IgnoreListException
 10 | from stacs.scan.model import finding, ignore_list
 11 | 
 12 | 
 13 | def by_pattern(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
 14 |     """Process a regex ignore list entry."""
 15 |     # Short circuit if no pattern is set.
 16 |     if not ignore.pattern:
 17 |         return False
 18 | 
 19 |     # If there's a match on the path, check whether the ignore is for the same module.
 20 |     if re.search(ignore.pattern, finding.path):
 21 |         if ignore.module != finding.source.module:
 22 |             return False
 23 | 
 24 |         # Then check whether the ignore is for the particular reference.
 25 |         if ignore.references:
 26 |             if finding.source.reference in ignore.references:
 27 |                 return True
 28 | 
 29 |             return False
 30 | 
 31 |         # Or check whether the ignore is for the same offest.
 32 |         if ignore.offset is not None:
 33 |             if finding.location.offset == ignore.offset:
 34 |                 return True
 35 |             return False
 36 | 
 37 |         # In this case this is a fairly permissive ignore.
 38 |         return True
 39 | 
 40 |     return False
 41 | 
 42 | 
 43 | def by_path(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
 44 |     """Process a path based ignore list entry."""
 45 |     # Short circuit if no path is set.
 46 |     if not ignore.path:
 47 |         return False
 48 | 
 49 |     # If there's a match on the hash, check whether the ignore is for the same module.
 50 |     if ignore.path == finding.path:
 51 |         if finding.source.module != ignore.module:
 52 |             return False
 53 | 
 54 |         # Then check whether the ignore is for the particular reference.
 55 |         if ignore.references:
 56 |             if finding.source.reference in ignore.references:
 57 |                 return True
 58 |             return False
 59 | 
 60 |         # Or check whether the ignore is for the same offest.
 61 |         if ignore.offset is not None:
 62 |             if finding.location.offset == ignore.offset:
 63 |                 return True
 64 |             return False
 65 | 
 66 |         # In this case this is a fairly permissive ignore.
 67 |         return True
 68 | 
 69 |     return False
 70 | 
 71 | 
 72 | def by_hash(finding: finding.Entry, ignore: ignore_list.Entry) -> bool:
 73 |     """Process a hash based ignore list entry."""
 74 |     # Short circuit if no hash is set.
 75 |     if not ignore.md5:
 76 |         return False
 77 | 
 78 |     # If there's a match on the hash, check whether the ignore is for the same module.
 79 |     if ignore.md5 == finding.md5:
 80 |         if finding.source.module != ignore.module:
 81 |             return False
 82 | 
 83 |         # Then check whether the ignore is for the particular reference.
 84 |         if ignore.references:
 85 |             if finding.source.reference in ignore.references:
 86 |                 return True
 87 |             return False
 88 | 
 89 |         # Or check whether the ignore is for the same offest.
 90 |         if ignore.offset is not None:
 91 |             if finding.location.offset == ignore.offset:
 92 |                 return True
 93 |             return False
 94 | 
 95 |         # In this case this is a fairly permissive ignore.
 96 |         return True
 97 | 
 98 |     return False
 99 | 
100 | 
101 | def process(
102 |     findings: List[finding.Entry],
103 |     ignore_list: ignore_list.Format,
104 | ) -> List[finding.Entry]:
105 |     """Processes an ignore list and marks the relevant findings as ignored."""
106 |     filtered_findings = []
107 | 
108 |     for entry in findings:
109 |         for ignore in ignore_list.ignore:
110 |             try:
111 |                 if by_path(entry, ignore):
112 |                     ignore = finding.Ignore(
113 |                         ignored=True,
114 |                         reason=ignore.reason,
115 |                     )
116 |                     entry.ignore = ignore
117 |                     break
118 | 
119 |                 if by_pattern(entry, ignore):
120 |                     ignore = finding.Ignore(
121 |                         ignored=True,
122 |                         reason=ignore.reason,
123 |                     )
124 |                     entry.ignore = ignore
125 |                     break
126 | 
127 |                 if by_hash(entry, ignore):
128 |                     ignore = finding.Ignore(
129 |                         ignored=True,
130 |                         reason=ignore.reason,
131 |                     )
132 |                     entry.ignore = ignore
133 |                     break
134 |             except re.error as err:
135 |                 raise IgnoreListException(
136 |                     f"Error in ignore list entry '{ignore.reason}': {err}"
137 |                 )
138 | 
139 |         # Add the finding to our results, whether updated or not.
140 |         filtered_findings.append(entry)
141 | 
142 |     return filtered_findings
143 | 


--------------------------------------------------------------------------------
/stacs/scan/helper.py:
--------------------------------------------------------------------------------
 1 | """Define helpers commonly used throughout STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | from typing import List
 6 | 
 7 | import colorama
 8 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
 9 | from stacs.scan.exceptions import NoParentException
10 | 
11 | 
12 | def generate_virtual_path(
13 |     finding: "Finding",  # noqa: F821
14 |     artifacts: "List[Artifact]",  # noqa: F821
15 | ):
16 |     """Generate a virtual path for an input file."""
17 |     virtual_path = finding.filepath
18 | 
19 |     try:
20 |         parent = artifacts[finding.artifact].parent
21 | 
22 |         while True:
23 |             name = artifacts[parent].filepath
24 |             virtual_path = f"{name}{ARCHIVE_FILE_SEPARATOR}{virtual_path}"
25 | 
26 |             parent = artifacts[parent].parent
27 |     except NoParentException:
28 |         return virtual_path
29 | 
30 | 
31 | def printi(string, indent: int = 4, prefix: str = None):
32 |     """Super janky wrapper to print something indented."""
33 |     for line in string.splitlines():
34 |         if prefix:
35 |             print(f"{prefix}", end="")
36 | 
37 |         print(f"{' ' * indent}" + line)
38 | 
39 | 
40 | def banner(version: str) -> str:
41 |     """Returns a STACS console banner."""
42 |     banner = colorama.Fore.BLUE
43 |     banner += rf"""
44 |     ______________   ___________
45 |    / ___/_  __/   | / ____/ ___/
46 |    \__ \ / / / /| |/ /    \__ \
47 |   ___/ // / / ___ / /___ ___/ /
48 |  /____//_/ /_/  |_\____//____/
49 | 
50 |        STACS version {version}
51 |     """
52 |     return banner
53 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines loaders used by STACS.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | from stacs.scan.loader import archive  # noqa:F401
7 | from stacs.scan.loader import filepath  # noqa:F401
8 | from stacs.scan.loader import manifest  # noqa:F401
9 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/archive.py:
--------------------------------------------------------------------------------
  1 | """Defines handlers for unpacking of archives.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import bz2
  7 | import gzip
  8 | import hashlib
  9 | import logging
 10 | import lzma
 11 | import os
 12 | import shutil
 13 | import tarfile
 14 | import zipfile
 15 | import zlib
 16 | from typing import List, Tuple
 17 | 
 18 | import zstandard
 19 | from stacs.native import archive
 20 | from stacs.scan.constants import CHUNK_SIZE
 21 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
 22 | from stacs.scan.loader.format import dmg, xar
 23 | 
 24 | 
 25 | def path_hash(filepath: str) -> str:
 26 |     """Returns a hash of the filepath, for use with unique directory creation."""
 27 |     return hashlib.md5(bytes(filepath, "utf-8")).hexdigest()
 28 | 
 29 | 
 30 | def zip_handler(filepath: str, directory: str) -> None:
 31 |     """Attempts to extract the provided zip archive."""
 32 |     log = logging.getLogger(__name__)
 33 | 
 34 |     try:
 35 |         os.mkdir(directory, mode=0o700)
 36 |     except OSError as err:
 37 |         raise FileAccessException(
 38 |             f"Unable to create unpack directory at {directory}: {err}"
 39 |         )
 40 | 
 41 |     # Attempt to unpack the zipfile to the new unpack directory.
 42 |     try:
 43 |         with zipfile.ZipFile(filepath, "r") as reader:
 44 |             try:
 45 |                 reader.extractall(directory)
 46 |             except RuntimeError as err:
 47 |                 # Encrypted zips (why is this not a custom exception?!)
 48 |                 if "encrypted" in str(err):
 49 |                     log.warn(
 50 |                         f"Cannot process file in archive at {filepath}, skipping: {err}"
 51 |                     )
 52 |             except NotADirectoryError as err:
 53 |                 # Broken filepaths inside of ZIP.
 54 |                 log.warn(
 55 |                     f"Cannot process file in archive at {filepath}, skipping: {err}"
 56 |                 )
 57 |             except (OSError, IndexError) as err:
 58 |                 # Several conditions, but usually a corrupt / bad input zip.
 59 |                 log.warn(
 60 |                     f"Cannot process file in archive at {filepath}, skipping: {err}"
 61 |                 )
 62 |     except (zipfile.BadZipFile, OSError) as err:
 63 |         raise InvalidFileException(
 64 |             f"Unable to extract archive {filepath} to {directory}: {err}"
 65 |         )
 66 | 
 67 | 
 68 | def tar_handler(filepath: str, directory: str) -> None:
 69 |     """Attempts to extract the provided tarball."""
 70 |     try:
 71 |         os.mkdir(directory, mode=0o700)
 72 |     except OSError as err:
 73 |         raise FileAccessException(
 74 |             f"Unable to create unpack directory at {directory}: {err}"
 75 |         )
 76 | 
 77 |     # Attempt to unpack the tarball to the new unpack directory.
 78 |     try:
 79 |         with tarfile.open(filepath, "r") as reader:
 80 |             reader.extractall(directory)
 81 |     except (PermissionError, tarfile.TarError) as err:
 82 |         raise InvalidFileException(
 83 |             f"Unable to extract archive {filepath} to {directory}: {err}"
 84 |         )
 85 | 
 86 | 
 87 | def gzip_handler(filepath: str, directory: str) -> None:
 88 |     """Attempts to extract the provided gzip archive."""
 89 |     output = ".".join(os.path.basename(filepath).split(".")[:-1])
 90 | 
 91 |     # No dots? Just use the name as is.
 92 |     if len(output) < 1:
 93 |         output = os.path.basename(filepath)
 94 | 
 95 |     # Although gzip files cannot contain more than one file, we'll still spool into
 96 |     # a subdirectory under the cache for consistency.
 97 |     try:
 98 |         os.mkdir(directory, mode=0o700)
 99 |     except OSError as err:
100 |         raise FileAccessException(
101 |             f"Unable to create unpack directory at {directory}: {err}"
102 |         )
103 | 
104 |     # TODO: This can likely be optimized for tgz files, as currently the file will be
105 |     #       first processed and gunzipped, and then reprocessed to be extracted.
106 |     try:
107 |         with gzip.open(filepath, "rb") as fin:
108 |             with open(os.path.join(directory, output), "wb") as fout:
109 |                 shutil.copyfileobj(fin, fout, CHUNK_SIZE)
110 |     except gzip.BadGzipFile as err:
111 |         raise InvalidFileException(
112 |             f"Unable to extract archive {filepath} to {output}: {err}"
113 |         )
114 | 
115 | 
116 | def bzip2_handler(filepath: str, directory: str) -> None:
117 |     """Attempts to extract the provided bzip2 archive."""
118 |     output = ".".join(os.path.basename(filepath).split(".")[:-1])
119 | 
120 |     # No dots? Just use the name as is.
121 |     if len(output) < 1:
122 |         output = os.path.basename(filepath)
123 | 
124 |     # Like gzip, bzip2 cannot support more than a single file. Again, we'll spool into
125 |     # a subdirectory for consistency.
126 |     try:
127 |         os.mkdir(directory, mode=0o700)
128 |     except OSError as err:
129 |         raise FileAccessException(
130 |             f"Unable to create unpack directory at {directory}: {err}"
131 |         )
132 | 
133 |     # TODO: This can likely be optimized for tbz files, as currently the file will be
134 |     #       first processed and gunzipped, and then reprocessed to be extracted.
135 |     try:
136 |         with bz2.open(filepath, "rb") as fin:
137 |             with open(os.path.join(directory, output), "wb") as fout:
138 |                 shutil.copyfileobj(fin, fout, CHUNK_SIZE)
139 |     except (OSError, ValueError) as err:
140 |         raise InvalidFileException(
141 |             f"Unable to extract archive {filepath} to {output}: {err}"
142 |         )
143 | 
144 | 
145 | def zstd_handler(filepath: str, directory: str) -> None:
146 |     """Attempts to extract the provided zstd archive."""
147 |     output = ".".join(os.path.basename(filepath).split(".")[:-1])
148 | 
149 |     # No dots? Just use the name as is.
150 |     if len(output) < 1:
151 |         output = os.path.basename(filepath)
152 | 
153 |     # zstd does not appear to provide a native mechanism to compress multiple files,
154 |     # and recommend 'to combine zstd with tar'.
155 |     try:
156 |         os.mkdir(directory, mode=0o700)
157 |     except OSError as err:
158 |         raise FileAccessException(
159 |             f"Unable to create unpack directory at {directory}: {err}"
160 |         )
161 | 
162 |     try:
163 |         decompressor = zstandard.ZstdDecompressor()
164 | 
165 |         with open(filepath, "rb") as fin:
166 |             with open(os.path.join(directory, output), "wb") as fout:
167 |                 decompressor.copy_stream(fin, fout, read_size=CHUNK_SIZE)
168 |     except (OSError, ValueError, zstandard.ZstdError) as err:
169 |         raise InvalidFileException(
170 |             f"Unable to extract archive {filepath} to {output}: {err}"
171 |         )
172 | 
173 | 
174 | def lzma_handler(filepath: str, directory: str) -> None:
175 |     """Attempts to extract the provided xz / lzma archive."""
176 |     output = ".".join(os.path.basename(filepath).split(".")[:-1])
177 | 
178 |     # No dots? Just use the name as is.
179 |     if len(output) < 1:
180 |         output = os.path.basename(filepath)
181 | 
182 |     # Although xz files cannot contain more than one file, we'll still spool into
183 |     # a subdirectory under the cache for consistency.
184 |     try:
185 |         os.mkdir(directory, mode=0o700)
186 |     except OSError as err:
187 |         raise FileAccessException(
188 |             f"Unable to create unpack directory at {directory}: {err}"
189 |         )
190 | 
191 |     try:
192 |         with lzma.open(filepath, "rb") as fin:
193 |             with open(os.path.join(directory, output), "wb") as fout:
194 |                 shutil.copyfileobj(fin, fout, CHUNK_SIZE)
195 |     except lzma.LZMAError as err:
196 |         raise InvalidFileException(
197 |             f"Unable to extract archive {filepath} to {output}: {err}"
198 |         )
199 | 
200 | 
201 | def zlib_handler(filepath: str, directory: str) -> None:
202 |     """Attempts to extract the provided zlib archive."""
203 |     output = ".".join(os.path.basename(filepath).split(".")[:-1])
204 | 
205 |     # No dots? Just use the name as is.
206 |     if len(output) < 1:
207 |         output = os.path.basename(filepath)
208 | 
209 |     try:
210 |         os.mkdir(directory, mode=0o700)
211 |     except OSError as err:
212 |         raise FileAccessException(
213 |             f"Unable to create unpack directory at {directory}: {err}"
214 |         )
215 | 
216 |     try:
217 |         decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS)
218 | 
219 |         with open(filepath, "rb") as fin:
220 |             with open(os.path.join(directory, output), "wb") as fout:
221 |                 while compressed := fin.read(CHUNK_SIZE):
222 |                     fout.write(decompressor.decompress(compressed))
223 |     except zlib.error as err:
224 |         raise InvalidFileException(
225 |             f"Unable to extract archive {filepath} to {output}: {err}"
226 |         )
227 | 
228 | 
229 | def xar_handler(filepath: str, directory: str) -> None:
230 |     """Attempts to extract the provided XAR archive."""
231 |     try:
232 |         os.mkdir(directory, mode=0o700)
233 |     except OSError as err:
234 |         raise FileAccessException(
235 |             f"Unable to create unpack directory at {directory}: {err}"
236 |         )
237 | 
238 |     # Attempt to unpack the archive.
239 |     try:
240 |         archive = xar.XAR(filepath)
241 |         archive.extract(directory)
242 |     except FileAccessException as err:
243 |         raise FileAccessException(
244 |             f"Unable to extract archive {filepath} to {directory}: {err}"
245 |         )
246 |     except InvalidFileException as err:
247 |         raise InvalidFileException(
248 |             f"Unable to extract archive {filepath} to {directory}: {err}"
249 |         )
250 | 
251 | 
252 | def dmg_handler(filepath: str, directory: str) -> None:
253 |     """Attempts to extract the provided DMG archive."""
254 |     try:
255 |         os.mkdir(directory, mode=0o700)
256 |     except OSError as err:
257 |         raise FileAccessException(
258 |             f"Unable to create unpack directory at {directory}: {err}"
259 |         )
260 | 
261 |     # Attempt to unpack the archive.
262 |     try:
263 |         archive = dmg.DMG(filepath)
264 |         archive.extract(directory)
265 |     except FileAccessException as err:
266 |         raise FileAccessException(
267 |             f"Unable to extract archive {filepath} to {directory}: {err}"
268 |         )
269 |     except InvalidFileException as err:
270 |         raise InvalidFileException(
271 |             f"Unable to extract archive {filepath} to {directory}: {err}"
272 |         )
273 | 
274 | 
275 | def libarchive_handler(filepath: str, directory: str) -> None:
276 |     """Attempts to extract the provided archive with libarchive."""
277 |     try:
278 |         os.mkdir(directory, mode=0o700)
279 |     except OSError as err:
280 |         raise FileAccessException(
281 |             f"Unable to create unpack directory at {directory}: {err}"
282 |         )
283 | 
284 |     # Attempt to unpack the archive to the new unpack directory.
285 |     try:
286 |         with archive.ArchiveReader(filepath) as reader:
287 |             for entry in reader:
288 |                 member = entry.filename
289 |                 member = member.lstrip("../")
290 |                 member = member.lstrip("./")
291 | 
292 |                 if entry.filename == ".":
293 |                     continue
294 | 
295 |                 destination = os.path.join(directory, member)
296 |                 parent = os.path.dirname(destination)
297 | 
298 |                 # Handle odd cases where a file was created where a directory needs to
299 |                 # be.
300 |                 if os.path.exists(parent) and os.path.isfile(parent):
301 |                     os.unlink(parent)
302 | 
303 |                 if os.path.isdir(destination):
304 |                     continue
305 | 
306 |                 # Create parent directories, as required.
307 |                 if not os.path.isdir(parent):
308 |                     os.makedirs(parent)
309 | 
310 |                 # If the entry is a directory, create it and move on.
311 |                 if entry.isdir:
312 |                     os.makedirs(destination, exist_ok=True)
313 |                     continue
314 | 
315 |                 with open(destination, "wb") as fout:
316 |                     while True:
317 |                         chunk = reader.read()
318 |                         if len(chunk) > 0:
319 |                             fout.write(chunk)
320 |                             continue
321 |                         break
322 |     except archive.ArchiveError as err:
323 |         raise InvalidFileException(
324 |             f"Unable to extract archive {filepath} to {directory}: {err}"
325 |         )
326 | 
327 | 
328 | def get_mimetype(chunk: bytes, start: bool) -> List[Tuple[int, str]]:
329 |     """Attempts to locate the appropriate handler for a given file.
330 | 
331 |     This may fail if the required "magic" is at an offset greater than the CHUNK_SIZE.
332 |     However, currently this is not an issue, but may need to be revisited later as more
333 |     archive types are supported.
334 | 
335 |     The start flag is used to indicate whether the current chunk is from the start of
336 |     the file, or the end of the file. Today we only support checking the first and last
337 |     chunk.
338 | 
339 |     Returns a list of weights and MIME types as a tuple. This weight is specified by
340 |     handlers and is used to allow "container" formats, which may contain multiple other
341 |     files of various matching types, to "win" the match - due to a higher weight.
342 |     """
343 |     for name, options in MIME_TYPE_HANDLERS.items():
344 |         offset = options["offset"]
345 |         magic = options["magic"]
346 | 
347 |         # If looking at the last chunk, only use negative offsets. This is to prevent
348 |         # false positives as position 0 in the last chunk is actually N bytes into the
349 |         # file. This is especially problematic for formats with short magic numbers,
350 |         # such as zlib.
351 |         if not start and offset >= 0:
352 |             continue
353 | 
354 |         # TODO: How to handle multiple matches in the same chunk? Is this this likely?
355 |         for format in magic:
356 |             if chunk[offset : (offset + len(format))] == format:  # noqa: E203
357 |                 return (options["weight"], name)
358 | 
359 |     return (0, None)
360 | 
361 | 
362 | # Define all supported archives and their handlers. As we currently only support a small
363 | # list of types we can just define file magic directly here, rather than use an external
364 | # library. This removes the need for dependencies which may have other system
365 | # dependencies - such as libmagic. It should also provide a small a speed up during
366 | # unpacking, as we're only looking for a small number of types.
367 | MIME_TYPE_HANDLERS = {
368 |     "application/x-tar": {
369 |         "weight": 1,
370 |         "offset": 257,
371 |         "magic": [
372 |             bytearray([0x75, 0x73, 0x74, 0x61, 0x72]),
373 |         ],
374 |         "handler": tar_handler,
375 |     },
376 |     "application/gzip": {
377 |         "weight": 1,
378 |         "offset": 0,
379 |         "magic": [
380 |             bytearray([0x1F, 0x8B]),
381 |         ],
382 |         "handler": gzip_handler,
383 |     },
384 |     "application/x-bzip2": {
385 |         "weight": 1,
386 |         "offset": 0,
387 |         "magic": [
388 |             bytearray([0x42, 0x5A, 0x68]),
389 |         ],
390 |         "handler": bzip2_handler,
391 |     },
392 |     "application/zip": {
393 |         "weight": 1,
394 |         "offset": 0,
395 |         "magic": [
396 |             bytearray([0x50, 0x4B, 0x03, 0x04]),
397 |             bytearray([0x50, 0x4B, 0x05, 0x06]),
398 |             bytearray([0x50, 0x4B, 0x07, 0x08]),
399 |         ],
400 |         "handler": zip_handler,
401 |     },
402 |     "application/zlib": {
403 |         "weight": 1,
404 |         "offset": 0,
405 |         "magic": [
406 |             bytearray([0x78, 0x01]),
407 |             bytearray([0x78, 0x5E]),
408 |             bytearray([0x78, 0x9C]),
409 |             bytearray([0x78, 0xDA]),
410 |         ],
411 |         "handler": zlib_handler,
412 |     },
413 |     "application/x-xz": {
414 |         "weight": 1,
415 |         "offset": 0,
416 |         "magic": [
417 |             bytearray([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]),
418 |         ],
419 |         "handler": lzma_handler,
420 |     },
421 |     "application/x-rpm": {
422 |         "weight": 1,
423 |         "offset": 0,
424 |         "magic": [
425 |             bytearray([0xED, 0xAB, 0xEE, 0xDB]),
426 |         ],
427 |         "handler": libarchive_handler,
428 |     },
429 |     "application/x-iso9660-image": {
430 |         "weight": 1,
431 |         "offset": 0x8001,
432 |         "magic": [
433 |             bytearray([0x43, 0x44, 0x30, 0x30, 0x31]),
434 |         ],
435 |         "handler": libarchive_handler,
436 |     },
437 |     "application/x-7z-compressed": {
438 |         "weight": 1,
439 |         "offset": 0,
440 |         "magic": [
441 |             bytearray([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]),
442 |         ],
443 |         "handler": libarchive_handler,
444 |     },
445 |     "application/x-cpio": {
446 |         "weight": 1,
447 |         "offset": 0,
448 |         "magic": [
449 |             bytearray([0xC7, 0x71]),  # 070707 in octal (Little Endian).
450 |             bytearray([0x71, 0xC7]),  # 070707 in octal (Big Endian).
451 |             bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x31]),  # "070701"
452 |             bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x32]),  # "070702"
453 |             bytearray([0x30, 0x37, 0x30, 0x37, 0x30, 0x37]),  # "070707"
454 |         ],
455 |         "handler": libarchive_handler,
456 |     },
457 |     "application/x-xar": {
458 |         "weight": 1,
459 |         "offset": 0,
460 |         "magic": [
461 |             bytearray([0x78, 0x61, 0x72, 0x21]),
462 |         ],
463 |         "handler": xar_handler,
464 |     },
465 |     "application/vnd.ms-cab-compressed": {
466 |         "weight": 1,
467 |         "offset": 0,
468 |         "magic": [
469 |             bytearray([0x4D, 0x53, 0x43, 0x46]),
470 |         ],
471 |         "handler": libarchive_handler,
472 |     },
473 |     "application/x-archive": {
474 |         "weight": 1,
475 |         "offset": 0,
476 |         "magic": [
477 |             bytearray([0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E]),
478 |         ],
479 |         "handler": libarchive_handler,
480 |     },
481 |     "application/vnd.rar": {
482 |         "weight": 1,
483 |         "offset": 0,
484 |         "magic": [
485 |             bytearray([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]),
486 |         ],
487 |         "handler": libarchive_handler,
488 |     },
489 |     "application/zstd": {
490 |         "weight": 1,
491 |         "offset": 0,
492 |         "magic": [
493 |             bytearray([0x28, 0xB5, 0x2F, 0xFD]),
494 |         ],
495 |         "handler": zstd_handler,
496 |     },
497 |     "application/x-apple-diskimage": {
498 |         "weight": 2,  # "container" formats are weighted higher.
499 |         "offset": -512,
500 |         "magic": [
501 |             bytearray([0x6B, 0x6F, 0x6C, 0x79]),
502 |         ],
503 |         "handler": dmg_handler,
504 |     },
505 | }
506 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/filepath.py:
--------------------------------------------------------------------------------
  1 | """Defines a file path loader for STACS.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import hashlib
  7 | import logging
  8 | import os
  9 | import re
 10 | import shutil
 11 | from concurrent.futures import ThreadPoolExecutor, as_completed
 12 | from typing import List
 13 | 
 14 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR, CHUNK_SIZE
 15 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
 16 | from stacs.scan.loader import archive
 17 | from stacs.scan.model.manifest import Entry
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def metadata(filepath: str, overlay: str = None, parent: str = None) -> Entry:
 23 |     """Generates a hash and determines the mimetype of the input file."""
 24 |     md5 = hashlib.md5()
 25 |     mime = None
 26 |     winner = 0
 27 | 
 28 |     # Read the file in chunks.
 29 |     try:
 30 |         stat = os.stat(filepath)
 31 | 
 32 |         with open(filepath, "rb") as fin:
 33 |             while chunk := fin.read(CHUNK_SIZE):
 34 |                 md5.update(chunk)
 35 | 
 36 |                 # Attempt to determine the mime-type using the first and last chunk.
 37 |                 # Note: This may need to change further in future.
 38 |                 if (not mime and fin.tell() <= CHUNK_SIZE) or len(chunk) < CHUNK_SIZE:
 39 |                     start = False if len(chunk) < CHUNK_SIZE else True
 40 |                     (score, candidate) = archive.get_mimetype(chunk, start)
 41 | 
 42 |                     # Swap the winner if the score is higher.
 43 |                     if score > winner:
 44 |                         mime = candidate
 45 |                         winner = score
 46 |     except OSError as err:
 47 |         raise FileAccessException(f"Unable to open file at {filepath}: {err}")
 48 | 
 49 |     return Entry(
 50 |         path=filepath,
 51 |         md5=md5.hexdigest(),
 52 |         mime=mime,
 53 |         overlay=overlay,
 54 |         parent=parent,
 55 |         size=stat.st_size,
 56 |     )
 57 | 
 58 | 
 59 | def walker(path: str, skip_on_eacces: bool) -> List[str]:
 60 |     """Recursively walk a file path, returning a list of all files."""
 61 |     entries = []
 62 | 
 63 |     # TODO: Would moving walker to a generator yield a performance increase, or lead to
 64 |     #       higher disk contention due to the hasher running at the same time?
 65 |     try:
 66 |         with os.scandir(path) as scan:
 67 |             for handle in scan:
 68 |                 try:
 69 |                     # Recurse on directories, but not symlinks.
 70 |                     if handle.is_dir() and not handle.is_symlink():
 71 |                         entries.extend(walker(handle.path, skip_on_eacces))
 72 | 
 73 |                     # Track files, but not symlinks.
 74 |                     if handle.is_file() and not handle.is_symlink():
 75 |                         entries.append(handle.path)
 76 |                 except PermissionError:
 77 |                     if not skip_on_eacces:
 78 |                         raise
 79 |                 except OSError:
 80 |                     # This is usually due to too many levels of symlinks. However, other
 81 |                     # cases are likely with a large enough input.
 82 |                     continue
 83 |     except NotADirectoryError:
 84 |         entries.append(path)
 85 | 
 86 |     return list(set(entries))
 87 | 
 88 | 
 89 | def qualify(path: str) -> str:
 90 |     """Add the scheme to a file path, if required."""
 91 |     if path.startswith("/"):
 92 |         return f"file://{path}"
 93 |     else:
 94 |         return path
 95 | 
 96 | 
 97 | def finder(
 98 |     path: str,
 99 |     cache: str,
100 |     workers: int = 10,
101 |     skip_on_eacces: bool = True,
102 |     skip_on_corrupt: bool = False,
103 | ) -> List[Entry]:
104 |     """Processes the input path, returning a list of all files and their hashes."""
105 |     entries = []
106 |     futures = dict()
107 | 
108 |     # Run the metadata enumerator in a thread pool as we're likely to be I/O bound.
109 |     with ThreadPoolExecutor(max_workers=workers) as pool:
110 |         futures = {
111 |             pool.submit(metadata, file): file for file in walker(path, skip_on_eacces)
112 |         }
113 | 
114 |         # A loop and counter is used here to ensure that additional work which may be
115 |         # submitted during the 'final loop' isn't accidentally ignored.
116 |         while True:
117 |             complete = 0
118 |             for future in as_completed(futures):
119 |                 complete += 1
120 | 
121 |                 try:
122 |                     result = future.result()
123 |                 except FileAccessException:
124 |                     if not skip_on_eacces:
125 |                         raise
126 | 
127 |                 # Track the result and then remove the future from the initial futures
128 |                 # list so that these results aren't returned again next iteration.
129 |                 entries.append(result)
130 |                 del futures[future]
131 | 
132 |                 # Check it the file was found to be an archive, and if so, unpack it.
133 |                 handler = archive.MIME_TYPE_HANDLERS.get(result.mime, {}).get("handler")
134 |                 if not handler:
135 |                     continue
136 | 
137 |                 # Remove any existing previously unpacked files, then unpack the archive
138 |                 # and submit extracted files back into the queue. This is to allow for
139 |                 # easy recursive unpacking of nested archives.
140 |                 destination = os.path.join(cache, archive.path_hash(result.path))
141 |                 shutil.rmtree(destination, ignore_errors=True)
142 | 
143 |                 try:
144 |                     handler(result.path, destination)
145 |                 except InvalidFileException as err:
146 |                     # Only skip with a warning if explicitly configured to do so.
147 |                     if skip_on_corrupt:
148 |                         logger.warning(
149 |                             f"Skipping file at {result.path} due to error when "
150 |                             f"processing: {err}"
151 |                         )
152 |                     else:
153 |                         raise
154 | 
155 |                 for file in walker(destination, skip_on_eacces):
156 |                     # The overlay path is a 'virtual' path that is constructed based on
157 |                     # the archive the file appears inside of, and the path of the file
158 |                     # inside of the archive. However, as archives may be nested, we need
159 |                     # to check whether we already have an overlay and, if set, use that
160 |                     # value instead.
161 |                     if result.overlay:
162 |                         parent = result.overlay
163 |                     else:
164 |                         parent = result.path
165 | 
166 |                     logger.debug(f"Processing {file}, extracted from archive {parent}")
167 |                     overlay = (
168 |                         f"{parent}"
169 |                         f"{ARCHIVE_FILE_SEPARATOR}"
170 |                         f"{re.sub(rf'^{destination}/?', '', file)}"
171 |                     )
172 | 
173 |                     # Submit back to the pool for processing.
174 |                     submission = pool.submit(
175 |                         metadata, file, overlay=overlay, parent=result.md5
176 |                     )
177 |                     futures[submission] = file
178 | 
179 |             if complete == 0:
180 |                 break
181 | 
182 |     return entries
183 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/format/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines file format handlers used by STACS.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | from stacs.scan.loader.format import dmg, xar  # noqa: F401
7 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/format/dmg.py:
--------------------------------------------------------------------------------
  1 | """Provides an Apple Disk Image (DMG) parser and extractor.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import bz2
  7 | import lzma
  8 | import os
  9 | import plistlib
 10 | import struct
 11 | import zlib
 12 | from collections import namedtuple
 13 | from typing import List
 14 | 
 15 | from pydantic import BaseModel, Extra, Field
 16 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
 17 | 
 18 | # Structures names and geometry are via "Demystifying the DMG File Format"
 19 | # by Jonathan Levin (http://newosxbook.com/).
 20 | DMG_HEADER_MAGIC = b"koly"
 21 | DMG_HEADER = ">4sIIIQQQQQII16sII128sQQ120sII128sIQIII"
 22 | DMG_HEADER_MAGIC_SZ = len(DMG_HEADER_MAGIC)
 23 | DMG_HEADER_SZ = struct.calcsize(DMG_HEADER)
 24 | 
 25 | DMG_BLOCK_TABLE_MAGIC = b"mish"
 26 | DMG_BLOCK_TABLE = ">4sIQQQIIIIIIIIII128sI"
 27 | DMG_BLOCK_TABLE_MAGIC_SZ = len(DMG_BLOCK_TABLE_MAGIC)
 28 | DMG_BLOCK_TABLE_SZ = struct.calcsize(DMG_BLOCK_TABLE)
 29 | 
 30 | DMG_BLOCK_CHUNK = ">I4sQQQQ"
 31 | DMG_BLOCK_CHUNK_SZ = struct.calcsize(DMG_BLOCK_CHUNK)
 32 | 
 33 | DMGHeader = namedtuple(
 34 |     "DMGHeader",
 35 |     [
 36 |         "signature",
 37 |         "version",
 38 |         "header_size",
 39 |         "flags",
 40 |         "running_data_fork_offset",
 41 |         "data_fork_offset",
 42 |         "data_fork_length",
 43 |         "rsrc_fork_offset",
 44 |         "rsrc_fork_length",
 45 |         "segment_number",
 46 |         "segment_count",
 47 |         "segment_id",
 48 |         "data_checksum_type",
 49 |         "data_checksum_size",
 50 |         "data_checksum",
 51 |         "xml_offset",
 52 |         "xml_length",
 53 |         "reserved_1",
 54 |         "checksum_Type",
 55 |         "checksum_Size",
 56 |         "checksum",
 57 |         "image_variant",
 58 |         "sector_count",
 59 |         "reserved_2",
 60 |         "reserved_3",
 61 |         "reserved_4",
 62 |     ],
 63 | )
 64 | DMGBlockTable = namedtuple(
 65 |     "DMGBlockTable",
 66 |     [
 67 |         "signature",
 68 |         "version",
 69 |         "sector_number",
 70 |         "sector_count",
 71 |         "data_offset",
 72 |         "buffers_needed",
 73 |         "block_descriptors",
 74 |         "reserved_1",
 75 |         "reserved_2",
 76 |         "reserved_3",
 77 |         "reserved_4",
 78 |         "reserved_5",
 79 |         "reserved_6",
 80 |         "checksum_ype",
 81 |         "checksum_ize",
 82 |         "checksum",
 83 |         "chunk_count",
 84 |     ],
 85 | )
 86 | DMGBlockChunk = namedtuple(
 87 |     "DMGBlockChunk",
 88 |     [
 89 |         "type",
 90 |         "comment",
 91 |         "sector_number",
 92 |         "sector_count",
 93 |         "compressed_offset",
 94 |         "compressed_length",
 95 |     ],
 96 | )
 97 | 
 98 | 
 99 | class DMGBlock(BaseModel, extra=Extra.forbid):
100 |     """Expresses a DMG block entry and its chunks."""
101 | 
102 |     name: str
103 |     chunks: List[DMGBlockChunk] = Field([])
104 | 
105 | 
106 | class DMG:
107 |     """Provides an Apple Disk Image (DMG) parser and extractor."""
108 | 
109 |     def __init__(self, filepath: str):
110 |         self.archive = filepath
111 | 
112 |         try:
113 |             with open(self.archive, "rb") as fin:
114 |                 # DMG metadata is at the end of the file.
115 |                 fin.seek(-DMG_HEADER_SZ, 2)
116 | 
117 |                 # Ensure the provided file is actually a DMG.
118 |                 if fin.read(DMG_HEADER_MAGIC_SZ) != DMG_HEADER_MAGIC:
119 |                     raise InvalidFileException("File does not appear to be a DMG")
120 | 
121 |                 # Rewind and attempt to read in header.
122 |                 fin.seek(-DMG_HEADER_MAGIC_SZ, 1)
123 |                 self._header = DMGHeader._make(
124 |                     struct.unpack(DMG_HEADER, fin.read(DMG_HEADER_SZ))
125 |                 )
126 | 
127 |                 # Read the XML property list.
128 |                 fin.seek(self._header.xml_offset, 0)
129 |                 self._plist = plistlib.loads(fin.read(self._header.xml_length))
130 |         except OSError as err:
131 |             raise FileAccessException(f"Unable to read archive: {err}")
132 | 
133 |     def _parse_blocks(self) -> List[DMGBlock]:
134 |         """Recursively parse blocks and their associated chunks."""
135 |         candidates = []
136 | 
137 |         # Read the BLKX entries from the resource-fork section of the plist.
138 |         for entry in self._plist.get("resource-fork", {}).get("blkx", []):
139 |             data = entry.get("Data")
140 |             name = entry.get("Name")
141 | 
142 |             block = DMGBlock(name=name)
143 |             table = DMGBlockTable._make(
144 |                 struct.unpack(DMG_BLOCK_TABLE, data[0:DMG_BLOCK_TABLE_SZ])
145 |             )
146 | 
147 |             # Extract all blocks and their associated chunks from the encoded "Data"
148 |             # inside of the extracted plist.
149 |             start = DMG_BLOCK_TABLE_SZ
150 | 
151 |             for _ in range(0, table.chunk_count):
152 |                 end = start + DMG_BLOCK_CHUNK_SZ
153 |                 block.chunks.append(
154 |                     DMGBlockChunk._make(struct.unpack(DMG_BLOCK_CHUNK, data[start:end]))
155 |                 )
156 |                 start = end
157 | 
158 |             candidates.append(block)
159 | 
160 |         return candidates
161 | 
162 |     def extract(self, destination):
163 |         """Extract all blocks from the DMG to the optional destination directory."""
164 |         parent = os.path.basename(self.archive)
165 | 
166 |         try:
167 |             os.makedirs(destination, exist_ok=True)
168 |         except OSError as err:
169 |             raise FileAccessException(
170 |                 f"Unable to create directory during extraction: {err}"
171 |             )
172 | 
173 |         # Process each chunk inside of each block. A DMG has multiple blocks, and a
174 |         # block has N chunks.
175 |         for idx, block in enumerate(self._parse_blocks()):
176 |             output = os.path.join(destination, f"{parent}.{idx}.blob")
177 | 
178 |             for chunk in block.chunks:
179 |                 # Skip Ignored, Comment, and Last blocks (respectively).
180 |                 if chunk.type in [0x00000002, 0x7FFFFFFE, 0xFFFFFFFF]:
181 |                     continue
182 | 
183 |                 try:
184 |                     with open(self.archive, "rb") as fin, open(output, "ab") as fout:
185 |                         fin.seek(chunk.compressed_offset)
186 | 
187 |                         # 0x80000005 - Zlib.
188 |                         if chunk.type == 0x80000005:
189 |                             fout.write(
190 |                                 zlib.decompress(fin.read(chunk.compressed_length))
191 |                             )
192 | 
193 |                         # 0x80000005 - BZ2.
194 |                         if chunk.type == 0x80000006:
195 |                             fout.write(
196 |                                 bz2.decompress(fin.read(chunk.compressed_length))
197 |                             )
198 | 
199 |                         # 0x80000005 - LZMA.
200 |                         if chunk.type == 0x80000008:
201 |                             fout.write(
202 |                                 lzma.decompress(fin.read(chunk.compressed_length))
203 |                             )
204 | 
205 |                         # 0x00000000 - Zero Fill.
206 |                         if chunk.type == 0x00000000:
207 |                             fout.write(b"\x00" * chunk.compressed_length)
208 |                             continue
209 |                 except (OSError, lzma.LZMAError, ValueError) as err:
210 |                     raise InvalidFileException(err)
211 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/format/xar.py:
--------------------------------------------------------------------------------
  1 | """Provides an eXtensible ARchive parser and extrator.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import os
  7 | import struct
  8 | import xml.etree.ElementTree as ET
  9 | import zlib
 10 | from collections import namedtuple
 11 | from typing import List
 12 | 
 13 | from stacs.scan.constants import CHUNK_SIZE
 14 | from stacs.scan.exceptions import FileAccessException, InvalidFileException
 15 | 
 16 | XAR_MAGIC = b"xar!"
 17 | XAR_HEADER = ">4sHHQQI"
 18 | XAR_HEADER_SZ = struct.calcsize(XAR_HEADER)
 19 | 
 20 | # via xar/include/xar.h.in
 21 | XARHeader = namedtuple(
 22 |     "XARHeader",
 23 |     [
 24 |         "magic",
 25 |         "size",
 26 |         "version",
 27 |         "toc_length_compressed",
 28 |         "toc_length_uncompressed",
 29 |         "cksum_alg",
 30 |     ],
 31 | )
 32 | 
 33 | XAREntry = namedtuple(
 34 |     "XAREntry",
 35 |     [
 36 |         "length",
 37 |         "offset",
 38 |         "size",
 39 |         "encoding",
 40 |         "archived_cksum_kind",
 41 |         "archived_cksum",
 42 |         "path",
 43 |         "name",
 44 |         "kind",
 45 |     ],
 46 | )
 47 | 
 48 | 
 49 | class XAR:
 50 |     """Provides an eXtensible ARchive Format parser and extrator."""
 51 | 
 52 |     def __init__(self, filepath: str):
 53 |         self.archive = filepath
 54 | 
 55 |         try:
 56 |             with open(self.archive, "rb") as fin:
 57 |                 # Ensure the provided file is actually a XAR.
 58 |                 if fin.read(4) != XAR_MAGIC:
 59 |                     raise InvalidFileException("File does not appear to be a XAR")
 60 | 
 61 |                 # Rewind and attempt to read in header.
 62 |                 fin.seek(0)
 63 |                 self._header = XARHeader._make(
 64 |                     struct.unpack(XAR_HEADER, fin.read(XAR_HEADER_SZ))
 65 |                 )
 66 | 
 67 |                 # Read and decompress the table-of-contents.
 68 |                 fin.seek(self._header.size)
 69 | 
 70 |                 self._toc = ET.fromstring(
 71 |                     str(
 72 |                         zlib.decompress(fin.read(self._header.toc_length_uncompressed)),
 73 |                         "utf-8",
 74 |                     )
 75 |                 )
 76 |         except zlib.error as err:
 77 |             raise InvalidFileException(f"Unable to read table-of-contents: {err}")
 78 |         except OSError as err:
 79 |             raise FileAccessException(f"Unable to read archive: {err}")
 80 | 
 81 |     def _parse_entries(self, root, directory="") -> List[XAREntry]:
 82 |         """Recursively parse entries from the table-of-contents."""
 83 |         candidates = []
 84 | 
 85 |         # Strip any slashes, only using the last path component.
 86 |         kind = root.find(".type").text
 87 |         name = root.find(".name").text.split("/")[-1]
 88 |         path = os.path.join(directory, name)
 89 | 
 90 |         # Recurse for directories
 91 |         if kind == "directory":
 92 |             for element in root.findall(".//file"):
 93 |                 candidates.extend(self._parse_entries(element, directory=path))
 94 | 
 95 |         if kind == "file":
 96 |             size = int(root.find(".//data/size").text)
 97 |             length = int(root.find(".//data/length").text)
 98 |             offset = int(root.find(".//data/offset").text)
 99 |             encoding = root.find(".//data/encoding").get("style")
100 |             archived_cksum = root.find(".//data/archived-checksum").text
101 |             archived_cksum_kind = root.find(".//data/archived-checksum").get("style")
102 | 
103 |             candidates.append(
104 |                 XAREntry(
105 |                     length,
106 |                     offset,
107 |                     size,
108 |                     encoding,
109 |                     archived_cksum,
110 |                     archived_cksum_kind,
111 |                     path,
112 |                     name,
113 |                     kind,
114 |                 )
115 |             )
116 | 
117 |         return candidates
118 | 
119 |     def entries(self) -> List[XAREntry]:
120 |         """Return a list of entries in this XAR."""
121 |         candidates = []
122 | 
123 |         for entry in self._toc.findall("./toc/file"):
124 |             candidates.extend(self._parse_entries(entry))
125 | 
126 |         return candidates
127 | 
128 |     def extract(self, destination):
129 |         """Extract all entries from the XAR to the optional destination directory."""
130 |         # Offset must be adjusted by the size of the ToC and the header. This is as the
131 |         # offset is from the first byte AFTER the header and compressed ToC.
132 |         header_size = self._header.size + self._header.toc_length_compressed
133 | 
134 |         for entry in self.entries():
135 |             parent = os.path.dirname(os.path.join(destination, entry.path))
136 | 
137 |             try:
138 |                 os.makedirs(parent, exist_ok=True)
139 |             except OSError as err:
140 |                 raise FileAccessException(
141 |                     f"Unable to create directory during extraction: {err}"
142 |                 )
143 | 
144 |             # Check whether a decompressor should be used.
145 |             decompressor = None
146 | 
147 |             if entry.encoding == "application/x-gzip":
148 |                 decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 32).decompress
149 | 
150 |             # Perform extraction.
151 |             # TODO: No decompression or integrity checking is performed today, nor are
152 |             # ownership and modes followed.
153 |             remaining = entry.length
154 | 
155 |             try:
156 |                 with open(self.archive, "rb") as fin:
157 |                     with open(os.path.join(destination, entry.path), "wb") as fout:
158 |                         fin.seek(header_size + entry.offset)
159 | 
160 |                         # Read all data in chunks to not balloon memory when processing
161 |                         # large files.
162 |                         while remaining > 0:
163 |                             delta = remaining - CHUNK_SIZE
164 |                             if delta < 0:
165 |                                 read_length = remaining
166 |                             else:
167 |                                 read_length = CHUNK_SIZE
168 | 
169 |                             # Use a decompressor, if required.
170 |                             if decompressor:
171 |                                 fout.write(decompressor(fin.read(read_length)))
172 |                             else:
173 |                                 fout.write(fin.read(read_length))
174 | 
175 |                             remaining -= read_length
176 |             except (OSError, zlib.error) as err:
177 |                 raise InvalidFileException(err)
178 | 


--------------------------------------------------------------------------------
/stacs/scan/loader/manifest.py:
--------------------------------------------------------------------------------
 1 | """Defines a manifest loader for STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | #
 7 | # TODO: Implement the manifest loader. This should take the contents of a manifest
 8 | #       which matches the stacs.scan.model.manifest.Format schema. It should also check
 9 | #       whether all requested files exist, and generate MD5 sums for them if not
10 | #       specified in the manifest 'Entry' (stacs.scan.model.manifest.Entry).
11 | #
12 | 


--------------------------------------------------------------------------------
/stacs/scan/model/__init__.py:
--------------------------------------------------------------------------------
 1 | """Defines models used by STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | from stacs.scan.model import finding  # noqa: F401
 7 | from stacs.scan.model import ignore_list  # noqa: F401
 8 | from stacs.scan.model import manifest  # noqa: F401
 9 | from stacs.scan.model import pack  # noqa: F401
10 | 


--------------------------------------------------------------------------------
/stacs/scan/model/finding.py:
--------------------------------------------------------------------------------
  1 | """Defines types to assist with reporting findings.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | from typing import List
  7 | 
  8 | from pydantic import BaseModel, Extra, Field
  9 | 
 10 | 
 11 | class Location(BaseModel, extra=Extra.forbid):
 12 |     """Defines data associated with a location of a finding."""
 13 | 
 14 |     line: int = Field(
 15 |         None,
 16 |         title="The line number which contains the finding.",
 17 |     )
 18 |     offset: int = Field(
 19 |         None,
 20 |         title="The offset from the start of the file of the finding (in bytes).",
 21 |     )
 22 | 
 23 | 
 24 | class Source(BaseModel, extra=Extra.forbid):
 25 |     """Defines data associated with the source of a finding."""
 26 | 
 27 |     module: str = Field(
 28 |         title="The STACS module which generated the finding.",
 29 |     )
 30 |     description: str = Field(
 31 |         None,
 32 |         title="A description of the finding",
 33 |     )
 34 |     reference: str = Field(
 35 |         title="A reference to the element which generated the finding.",
 36 |     )
 37 |     tags: List[str] = Field(
 38 |         [],
 39 |         title="A list of tags associated with the finding.",
 40 |     )
 41 |     version: str = Field(
 42 |         None,
 43 |         title="The version of the element which generated the finding.",
 44 |     )
 45 | 
 46 | 
 47 | class Sample(BaseModel, extra=Extra.forbid):
 48 |     """The content and context of a finding."""
 49 | 
 50 |     window: int = Field(
 51 |         title="The number of bytes before and after a finding included in the sample.",
 52 |     )
 53 |     before: str = Field(
 54 |         title="The contents of N bytes before the finding.",
 55 |     )
 56 |     after: str = Field(
 57 |         title="The contents of N bytes after the finding.",
 58 |     )
 59 |     finding: str = Field(
 60 |         title="The contents of the finding.",
 61 |     )
 62 |     binary: bool = Field(
 63 |         title="Indicates that the finding was binary and is base64 encoded."
 64 |     )
 65 | 
 66 | 
 67 | class Ignore(BaseModel, extra=Extra.forbid):
 68 |     """Defines the ignore schema of a finding."""
 69 | 
 70 |     ignored: bool = Field(
 71 |         False,
 72 |         title="Whether the finding should be ignored due to allow list.",
 73 |     )
 74 |     reason: str = Field(
 75 |         title="The reason to ignore the finding.",
 76 |     )
 77 | 
 78 | 
 79 | class Entry(BaseModel, extra=Extra.forbid):
 80 |     """Defines the schema of a finding."""
 81 | 
 82 |     path: str = Field(
 83 |         title="The path to the file.",
 84 |     )
 85 |     md5: str = Field(
 86 |         title="The MD5 sum of the file.",
 87 |     )
 88 |     confidence: float = Field(
 89 |         None,
 90 |         title="The confidence of the finding.",
 91 |     )
 92 |     location: Location = Field(
 93 |         None,
 94 |         title="The location of the finding in the input file.",
 95 |     )
 96 |     sample: Sample = Field(
 97 |         None,
 98 |         title="Information relating to the content of the finding.",
 99 |     )
100 |     source: Source = Field(
101 |         None,
102 |         title="Information about the source of the finding.",
103 |     )
104 |     ignore: Ignore = Field(
105 |         None,
106 |         title="Information about whether the entry should be ignored.",
107 |     )
108 | 


--------------------------------------------------------------------------------
/stacs/scan/model/ignore_list.py:
--------------------------------------------------------------------------------
  1 | """Defines types to assist with loading and processing of ignore lists.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import json
  7 | import os
  8 | from typing import List
  9 | 
 10 | from pydantic import BaseModel, Extra, Field, validator
 11 | from stacs.scan.exceptions import IgnoreListException, STACSException
 12 | 
 13 | 
 14 | class Entry(BaseModel, extra=Extra.forbid):
 15 |     """Defines the schema of an ignore."""
 16 | 
 17 |     path: str = Field(
 18 |         None,
 19 |         title="The path of a file to ignore.",
 20 |     )
 21 |     pattern: str = Field(
 22 |         None,
 23 |         title="A pattern of the file path to ignore.",
 24 |     )
 25 |     reason: str = Field(
 26 |         title="The reason for ignoring the finding.",
 27 |     )
 28 |     md5: str = Field(
 29 |         None,
 30 |         title="The MD5 sum of the file to ignore.",
 31 |     )
 32 |     module: str = Field(
 33 |         "stacs.scan.scanner.rules",
 34 |         title="Which module to ignore findings from.",
 35 |     )
 36 |     references: List[str] = Field(
 37 |         [],
 38 |         title=(
 39 |             "A list of references to ignore findings from, defaults to all if not set."
 40 |         ),
 41 |     )
 42 |     offset: int = Field(
 43 |         None,
 44 |         title="The offset of the specific finding to ignore.",
 45 |     )
 46 | 
 47 |     @validator("path", always=True)
 48 |     def exclusive_path_or_pattern(cls, value, values):
 49 |         """Ensure that either path or pattern is provided, not both."""
 50 |         if values.get("pattern") and value:
 51 |             raise IgnoreListException(
 52 |                 "Either path OR pattern must be specified, not both."
 53 |             )
 54 | 
 55 |         if values.get("pattern") and not value and not values.get("md5"):
 56 |             raise IgnoreListException("One of pattern, path, or md5 must be set.")
 57 | 
 58 |         return value
 59 | 
 60 |     @validator("offset", always=True)
 61 |     def offset_and_refernces_both_set(cls, value, values):
 62 |         if value and len(values.get("references")) > 0:
 63 |             raise IgnoreListException(
 64 |                 "An offset cannot be combined with a list of references."
 65 |             )
 66 | 
 67 |         return value
 68 | 
 69 | 
 70 | class Format(BaseModel, extra=Extra.forbid):
 71 |     """Defines the schema of the ignore list."""
 72 | 
 73 |     include: List[str] = Field(
 74 |         [],
 75 |         title="Define a list of additional ignore lists to include.",
 76 |     )
 77 |     ignore: List[Entry] = Field(
 78 |         [],
 79 |         title="Define a list of ignore list entries.",
 80 |     )
 81 | 
 82 | 
 83 | def from_file(filename: str) -> Format:
 84 |     """Load an ignore list from file, returning a rendered down and complete list."""
 85 |     parent_file = os.path.abspath(os.path.expanduser(filename))
 86 |     parent_path = os.path.dirname(parent_file)
 87 | 
 88 |     # Load the parent ignore list, and then recurse as needed to handle includes.
 89 |     try:
 90 |         with open(parent_file, "r") as fin:
 91 |             parent_list = Format(**json.load(fin))
 92 | 
 93 |         # Roll over the include list and replace all entries with a fully qualified,
 94 |         # path, if not already set.
 95 |         for index, path in enumerate(parent_list.include):
 96 |             parent_list.include[index] = os.path.expanduser(path)
 97 |             if not path.startswith("/"):
 98 |                 parent_list.include[index] = os.path.join(parent_path, path)
 99 |     except (OSError, json.JSONDecodeError) as err:
100 |         raise STACSException(err)
101 | 
102 |     # Recursively load included ignore lists.
103 |     for file in parent_list.include:
104 |         child_pack = from_file(file)
105 |         parent_list.ignore.extend(child_pack.ignore)
106 | 
107 |     # Finally strip the included ignore lists from the entry, as these have been
108 |     # resolved, returning the loaded ignore lists to the caller.
109 |     parent_list.include.clear()
110 |     return parent_list
111 | 


--------------------------------------------------------------------------------
/stacs/scan/model/manifest.py:
--------------------------------------------------------------------------------
 1 | """Defines types to assist with loading and processing of manifests.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | from typing import List
 7 | 
 8 | from pydantic import BaseModel, Extra, Field
 9 | 
10 | 
11 | class Entry(BaseModel, extra=Extra.forbid):
12 |     """Defines the schema of a file to process."""
13 | 
14 |     path: str = Field(
15 |         None,
16 |         title="The path to the file on disk.",
17 |     )
18 |     overlay: str = Field(
19 |         None,
20 |         title=(
21 |             "The overlay path of a file. This is used to generate virtual paths which "
22 |             "provider the path to files inside of archives."
23 |         ),
24 |     )
25 |     md5: str = Field(
26 |         None,
27 |         title="The MD5 sum of the file.",
28 |     )
29 |     parent: str = Field(
30 |         None,
31 |         title="The MD5 sum of the file's parent.",
32 |     )
33 |     mime: str = Field(
34 |         None,
35 |         title="The mimetype of the file.",
36 |     )
37 |     size: int = Field(
38 |         None,
39 |         title="The size of the file.",
40 |     )
41 | 
42 | 
43 | class Format(BaseModel, extra=Extra.forbid):
44 |     """Defines the schema of a manifest file."""
45 | 
46 |     files: List[Entry] = Field(
47 |         [],
48 |         title="A list of files to scan.",
49 |     )
50 | 


--------------------------------------------------------------------------------
/stacs/scan/model/pack.py:
--------------------------------------------------------------------------------
 1 | """Defines types to assist with loading and processing of rule packs.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | import json
 7 | import os
 8 | from typing import List
 9 | 
10 | from pydantic import BaseModel, Extra, Field
11 | from stacs.scan.exceptions import STACSException
12 | 
13 | 
14 | class Entry(BaseModel, extra=Extra.forbid):
15 |     """Defines the schema of an allow."""
16 | 
17 |     module: str = Field(
18 |         "rules",
19 |         title="Which module the rules are for.",
20 |     )
21 |     path: str = Field(
22 |         None,
23 |         title="The path of a the module's rule to load.",
24 |     )
25 | 
26 | 
27 | class Format(BaseModel, extra=Extra.forbid):
28 |     """Defines the schema of the rule pack."""
29 | 
30 |     include: List[str] = Field(
31 |         [],
32 |         title="Define a list of additional packs to include.",
33 |     )
34 |     pack: List[Entry] = Field(
35 |         [],
36 |         title="A list of pack entries.",
37 |     )
38 | 
39 | 
40 | def from_file(filename: str) -> Format:
41 |     """Load a pack from file, returning a rendered down and complete pack."""
42 |     parent_file = os.path.abspath(os.path.expanduser(filename))
43 |     parent_path = os.path.dirname(parent_file)
44 | 
45 |     # Load the parent pack, and then recurse as needed to handle includes.
46 |     try:
47 |         with open(parent_file, "r") as fin:
48 |             parent_pack = Format(**json.load(fin))
49 | 
50 |         # Roll over the pack and ensure any entries are fully qualified.
51 |         for entry in parent_pack.pack:
52 |             entry.path = os.path.expanduser(entry.path)
53 |             if not entry.path.startswith("/"):
54 |                 # Resolve and update the path.
55 |                 entry.path = os.path.join(parent_path, entry.path)
56 |         # Roll over the include list and replace all entries with a fully qualified
57 |         # path, if not already set.
58 |         for index, path in enumerate(parent_pack.include):
59 |             if not path.startswith("/"):
60 |                 parent_pack.include[index] = os.path.join(parent_path, path)
61 |     except (OSError, json.JSONDecodeError) as err:
62 |         raise STACSException(err)
63 | 
64 |     # Recursively load included packs, adding results to the loaded pack.
65 |     for file in parent_pack.include:
66 |         child_pack = from_file(file)
67 |         parent_pack.pack.extend(child_pack.pack)
68 | 
69 |     # Finally strip the included packs from the entry, as these have been resolved,
70 |     # returning the loaded pack to the caller.
71 |     parent_pack.include.clear()
72 |     return parent_pack
73 | 


--------------------------------------------------------------------------------
/stacs/scan/output/__init__.py:
--------------------------------------------------------------------------------
1 | """Defines outputs supported by STACS.
2 | 
3 | SPDX-License-Identifier: BSD-3-Clause
4 | """
5 | 
6 | from stacs.scan.output import markdown  # noqa: F401
7 | from stacs.scan.output import pretty  # noqa: F401
8 | from stacs.scan.output import sarif  # noqa: F401
9 | 


--------------------------------------------------------------------------------
/stacs/scan/output/markdown.py:
--------------------------------------------------------------------------------
 1 | """Defines a markdown output handler for STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | from typing import List
 7 | 
 8 | from stacs.scan import model
 9 | from stacs.scan.exceptions import NotImplementedException
10 | 
11 | 
12 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str:
13 |     raise NotImplementedException("Markdown output not yet implemented, sorry!")
14 | 


--------------------------------------------------------------------------------
/stacs/scan/output/pretty.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | from typing import List
  3 | 
  4 | from colorama import Fore, init
  5 | from stacs.scan import helper, model
  6 | from stacs.scan.__about__ import __version__
  7 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
  8 | from stacs.scan.model.finding import Sample
  9 | 
 10 | 
 11 | def generate_file_tree(virtual_path: str) -> str:
 12 |     """Returns a tree layout to the virtual path."""
 13 |     tree = str()
 14 |     parts = virtual_path.split(ARCHIVE_FILE_SEPARATOR)
 15 | 
 16 |     for index, part in enumerate(parts):
 17 |         # Add some style. Print a package / box before each archive, and a document
 18 |         # before the file.
 19 |         if (index + 1) == len(parts):
 20 |             emoji = "📄"
 21 |         else:
 22 |             emoji = "📦"
 23 | 
 24 |         tree += f"{' ' * (index * 4)}`-- {emoji} {part}\n"
 25 | 
 26 |     return tree.rstrip()
 27 | 
 28 | 
 29 | def generate_sample(sample: Sample):
 30 |     """Return a plain-text and text formatted sample."""
 31 |     # Ensure the sample is nicely base64 encoded if binary, rather than slapping three
 32 |     # already base64'd strings together.
 33 |     raw = bytearray()
 34 |     if sample.binary:
 35 |         raw.extend(bytearray(base64.b64decode(sample.before)))
 36 |         raw.extend(bytearray(base64.b64decode(sample.finding)))
 37 |         raw.extend(bytearray(base64.b64decode(sample.after)))
 38 | 
 39 |         return str(base64.b64encode(raw), "utf-8")
 40 | 
 41 |     return "".join([sample.before, sample.finding, sample.after])
 42 | 
 43 | 
 44 | def render(findings: List[model.finding.Entry], pack: model.pack.Format) -> str:
 45 |     """Render a 'pretty' output to the console for human consumption."""
 46 |     init()
 47 | 
 48 |     # Find all unsuppressed findings, and track them separately.
 49 |     results = {}
 50 |     unsuppressed = 0
 51 | 
 52 |     for finding in findings:
 53 |         # Check for suppressions.
 54 |         if finding.ignore is not None and finding.ignore.ignored:
 55 |             continue
 56 | 
 57 |         # Track it.
 58 |         unsuppressed += 1
 59 | 
 60 |         if results.get(finding.path) is None:
 61 |             results[finding.path] = []
 62 | 
 63 |         # Extract location appropriately.
 64 |         location = None
 65 |         if finding.location.line:
 66 |             location = f"line {finding.location.line}"
 67 |         else:
 68 |             location = f"{finding.location.offset}-bytes"
 69 | 
 70 |         # Generates all strings for presentation right away.
 71 |         results[finding.path].append(
 72 |             {
 73 |                 "tree": generate_file_tree(finding.path),
 74 |                 "path": finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1],
 75 |                 "rule": finding.source.reference,
 76 |                 "text": finding.source.description,
 77 |                 "location": location,
 78 |                 "sample": generate_sample(finding.sample),
 79 |             }
 80 |         )
 81 | 
 82 |     # Provide a summary.
 83 |     print(helper.banner(version=__version__))
 84 | 
 85 |     if findings == 0:
 86 |         print("✨ " + Fore.GREEN + "No unsuppressed findings! Great work! ✨\n")
 87 |         return
 88 | 
 89 |     # Render out the findings.
 90 |     print(
 91 |         f"{Fore.RED}🔥 There were {unsuppressed} unsuppressed findings in "
 92 |         f"{len(results)} files 🔥\n"
 93 |     )
 94 | 
 95 |     for candidate in results:
 96 |         filepath = candidate.split(ARCHIVE_FILE_SEPARATOR)[0]
 97 |         count = len(results[candidate])
 98 | 
 99 |         if ARCHIVE_FILE_SEPARATOR in candidate:
100 |             print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath} (Nested)")
101 |         else:
102 |             print(f"{Fore.RED}❌ {count} finding(s) inside of file {filepath}")
103 | 
104 |         for finding in results[candidate]:
105 |             print()
106 |             helper.printi(f"{Fore.YELLOW}Reason   : {finding['text']}")
107 |             helper.printi(f"{Fore.YELLOW}Rule Id  : {finding['rule']}")
108 |             helper.printi(f"{Fore.YELLOW}Location : {finding['location']}\n\n")
109 |             helper.printi(f"{Fore.YELLOW}Filetree:\n\n")
110 |             helper.printi(
111 |                 finding["tree"],
112 |                 prefix=f"    {Fore.RESET}|{Fore.BLUE}",
113 |             )
114 |             print()
115 |             helper.printi(f"{Fore.YELLOW}Sample:\n\n")
116 |             helper.printi(
117 |                 f"... {finding['sample']} ...",
118 |                 prefix=f"    {Fore.RESET}|{Fore.BLUE}",
119 |             )
120 |             print()
121 | 
122 |         print(f"\n{Fore.RESET}{'-' * 78}\n")
123 | 


--------------------------------------------------------------------------------
/stacs/scan/output/sarif.py:
--------------------------------------------------------------------------------
  1 | """Defines a SARIF output handler for STACS.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import base64
  7 | import json
  8 | import re
  9 | from typing import Any, Dict, List, Optional, Tuple
 10 | 
 11 | from stacs.scan import __about__, model
 12 | from stacs.scan.constants import ARCHIVE_FILE_SEPARATOR
 13 | 
 14 | # Only one SARIF version will be supported at a time.
 15 | SARIF_VERSION = "2.1.0"
 16 | SARIF_SCHEMA_URI = "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json"
 17 | SARIF_URI_BASE_ID = "STACSROOT"
 18 | 
 19 | 
 20 | def confidence_to_level(confidence: int) -> str:
 21 |     """Maps the confidence of a finding to a SARIF level."""
 22 |     if confidence < 70:
 23 |         return "warning"
 24 |     else:
 25 |         return "error"
 26 | 
 27 | 
 28 | def render_artifact(path: str, parent: Optional[int] = None) -> Dict[str, Any]:
 29 |     """Create a new artifact entry."""
 30 |     artifact = {
 31 |         "location": {
 32 |             "uri": path,
 33 |             "uriBaseId": SARIF_URI_BASE_ID,
 34 |         },
 35 |     }
 36 | 
 37 |     if parent is not None:
 38 |         artifact["parentIndex"] = parent
 39 | 
 40 |     return artifact
 41 | 
 42 | 
 43 | def path_in_artifacts(path: str, artifacts: List[Dict[str, Any]], parent) -> int:
 44 |     """Checks if a path exists in the artifacts list."""
 45 |     for index, artifact in enumerate(artifacts):
 46 |         if path == artifact["location"]["uri"]:
 47 |             # Short circuit if we both don't have a parent.
 48 |             if artifact.get("parentIndex", None) is None and parent is None:
 49 |                 return index
 50 | 
 51 |             # Check common ancestry.
 52 |             try:
 53 |                 their_parent = artifact.get("parentIndex", None)
 54 |                 our_parent = parent
 55 | 
 56 |                 while True:
 57 |                     if our_parent == their_parent:
 58 |                         their_parent = artifacts[their_parent]["parentIndex"]
 59 |                         our_parent = artifacts[our_parent]["parentIndex"]
 60 |                     else:
 61 |                         break
 62 |             except KeyError:
 63 |                 # We're good all the way back to the root.
 64 |                 return index
 65 | 
 66 |     return None
 67 | 
 68 | 
 69 | def add_artifact(
 70 |     root: str,
 71 |     finding: model.finding.Entry,
 72 |     artifacts: List[Dict[str, Any]],
 73 | ) -> Tuple[int, List[Dict[str, Any]]]:
 74 |     """Generates SARIF artifact entires for findings (SARIF v2.1.0 Section 3.24)."""
 75 |     parent = None
 76 | 
 77 |     for real_path in finding.path.split(ARCHIVE_FILE_SEPARATOR):
 78 |         # Strip the scan directory root from the path for Base URIs to work properly.
 79 |         path = re.sub(rf"^{root}", "", real_path).lstrip("/")
 80 | 
 81 |         # Check if the path already exists.
 82 |         new_parent = path_in_artifacts(path, artifacts, parent)
 83 |         if new_parent is not None:
 84 |             parent = new_parent
 85 |             continue
 86 | 
 87 |         artifacts.append(render_artifact(path, parent))
 88 |         parent = len(artifacts) - 1
 89 | 
 90 |     # Add metadata to this entry, if missing.
 91 |     artifacts[parent]["hashes"] = {
 92 |         "md5": finding.md5,
 93 |     }
 94 |     return (parent, artifacts)
 95 | 
 96 | 
 97 | def render(
 98 |     root: str, findings: List[model.finding.Entry], pack: model.pack.Format
 99 | ) -> str:
100 |     """Renders down a SARIF document for STACS findings."""
101 |     rules = []
102 |     results = []
103 |     artifacts = []
104 | 
105 |     # Generate a result (SARIF v2.1.0 Section 3.27) for each finding.
106 |     for finding in findings:
107 |         # Suppressions (SARIF v2.1.0 Section 3.27.23) are used to track findings where
108 |         # there is an "ignore" set - via ignore list.
109 |         suppressions = []
110 | 
111 |         # Create an artifactContent (SARIF v2.1.0 Section 3.3) entry to track the sample
112 |         # of the finding.
113 |         context_content = {}
114 |         artifact_content = {}
115 | 
116 |         if finding.sample.binary:
117 |             artifact_content["binary"] = finding.sample.finding
118 |             # Unencode and then re-encode the sample into a single B64 string to provide
119 |             # context.
120 |             context_content["binary"] = str(
121 |                 base64.b64encode(
122 |                     base64.b64decode(finding.sample.before)
123 |                     + base64.b64decode(finding.sample.finding)
124 |                     + base64.b64decode(finding.sample.after)
125 |                 ),
126 |                 "utf-8",
127 |             )
128 |         else:
129 |             artifact_content["text"] = finding.sample.finding
130 |             context_content["text"] = (
131 |                 finding.sample.before + finding.sample.finding + finding.sample.after
132 |             )
133 | 
134 |         # Create a new contextRegion (SARIF v2.1.0 Section 3.29.5) to provide contextual
135 |         # information about the finding, but do not include the byte or line number
136 |         # offset.
137 |         context = {"snippet": context_content}
138 | 
139 |         # Create a new region (SARIF v2.1.0 Section 3.30) to track the location of the
140 |         # finding and the sample.
141 |         region = {
142 |             "byteOffset": finding.location.offset,
143 |             "snippet": artifact_content,
144 |         }
145 | 
146 |         # Line numbers are optional, as the input file may be binary.
147 |         if finding.location.line:
148 |             region["startLine"] = finding.location.line
149 | 
150 |         # Add a new artifact for this finding, or retrieve the location of the existing.
151 |         index, artifacts = add_artifact(root, finding, artifacts)
152 | 
153 |         # Strip the scan directory root from the path, as the we're using the reference
154 |         # from originalUriBaseIds (SARIF v2.1.0 Section 3.14.14) to allow "portability".
155 |         path = finding.path.split(ARCHIVE_FILE_SEPARATOR)[-1]
156 |         relative_path = re.sub(rf"^{root}", "", path).lstrip("/")
157 | 
158 |         # Pin the artifact location back to a physical location (SARIF v2.1.0 Section
159 |         # 3.28.3).
160 |         physical_location = {
161 |             "physicalLocation": {
162 |                 "region": region,
163 |                 "contextRegion": context,
164 |                 "artifactLocation": {
165 |                     "uri": relative_path,
166 |                     "index": index,
167 |                     "uriBaseId": SARIF_URI_BASE_ID,
168 |                 },
169 |             },
170 |         }
171 | 
172 |         # Generate a new Rule entry, if required (SARIF v2.1.0 Section 3.49).
173 |         rule = None
174 | 
175 |         for candidate in rules:
176 |             if finding.source.reference == candidate.get("id"):
177 |                 rule = candidate
178 |                 break
179 | 
180 |         if not rule:
181 |             # Add the description from the original rule pack entry into the Rule for
182 |             # easy tracking.
183 |             rule = {
184 |                 "id": finding.source.reference,
185 |                 "shortDescription": {
186 |                     "text": finding.source.description,
187 |                 },
188 |             }
189 |             rules.append(rule)
190 | 
191 |         # Add a Suppression entry if this finding was marked as "Ignored", along with
192 |         # the reason (justification) from the original ignore list.
193 |         if finding.ignore is not None and finding.ignore.ignored:
194 |             suppressions.append(
195 |                 {
196 |                     "kind": "external",
197 |                     "status": "accepted",
198 |                     "justification": finding.ignore.reason,
199 |                 }
200 |             )
201 | 
202 |         # Track the finding (Result).
203 |         results.append(
204 |             {
205 |                 "message": rule.get("shortDescription"),
206 |                 "level": confidence_to_level(finding.confidence),
207 |                 "ruleId": finding.source.reference,
208 |                 "locations": [
209 |                     physical_location,
210 |                 ],
211 |                 "suppressions": suppressions,
212 |             }
213 |         )
214 | 
215 |     # Add a toolComponent (SARIF v2.1.0 Section 3.19), and bolt it all together.
216 |     tool = {
217 |         "driver": {
218 |             "name": __about__.__title__.upper(),
219 |             "rules": rules,
220 |             "version": __about__.__version__,
221 |             "downloadUri": __about__.__uri__,
222 |             "informationUri": __about__.__uri__,
223 |         },
224 |     }
225 |     run = {
226 |         "tool": tool,
227 |         "results": results,
228 |         "artifacts": artifacts,
229 |         "originalUriBaseIds": {
230 |             SARIF_URI_BASE_ID: {
231 |                 "uri": f"file://{root.rstrip('/')}/",
232 |             },
233 |         },
234 |     }
235 |     sarif = {
236 |         "version": SARIF_VERSION,
237 |         "$schema": SARIF_SCHEMA_URI,
238 |         "runs": [
239 |             run,
240 |         ],
241 |     }
242 | 
243 |     # Return a stringified JSON representation of the SARIF document.
244 |     return json.dumps(sarif)
245 | 


--------------------------------------------------------------------------------
/stacs/scan/scanner/__init__.py:
--------------------------------------------------------------------------------
 1 | """Defines scanners used by STACS.
 2 | 
 3 | SPDX-License-Identifier: BSD-3-Clause
 4 | """
 5 | 
 6 | from stacs.scan.scanner import rules
 7 | 
 8 | # Export all enabled scanners.
 9 | __all__ = [
10 |     "rules",
11 | ]
12 | 


--------------------------------------------------------------------------------
/stacs/scan/scanner/rules.py:
--------------------------------------------------------------------------------
  1 | """Implements a rules based scanner for STACS.
  2 | 
  3 | SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import base64
  7 | import hashlib
  8 | import os
  9 | from concurrent.futures import ThreadPoolExecutor, as_completed
 10 | from typing import List
 11 | 
 12 | import yara
 13 | 
 14 | from stacs.scan.constants import CHUNK_SIZE, WINDOW_SIZE
 15 | from stacs.scan.exceptions import FileAccessException, InvalidFormatException
 16 | from stacs.scan.loader import archive
 17 | from stacs.scan.model import finding, manifest, pack
 18 | 
 19 | 
 20 | def is_binary(target: manifest.Entry) -> bool:
 21 |     """Attempts to determine if a target file is binary."""
 22 |     # This is a bit false positive prone, as some "application" mime types are text.
 23 |     # However, as we only support a finite number of formats, we should be safe to do
 24 |     # this (for now!)
 25 |     binary_types = (
 26 |         "application",
 27 |         "image",
 28 |         "audio",
 29 |         "video",
 30 |     )
 31 |     if target.mime and target.mime.startswith(binary_types):
 32 |         return True
 33 | 
 34 |     # Otherwise, we'll try and read some data as text and see. This could fail if a
 35 |     # binary contained readable text for 10 * CHUNK_SIZE.
 36 |     try:
 37 |         with open(target.path, "r") as fin:
 38 |             for _ in range(0, 10):
 39 |                 fin.read(CHUNK_SIZE)
 40 |     except UnicodeDecodeError:
 41 |         return True
 42 | 
 43 |     # Define to text.
 44 |     return False
 45 | 
 46 | 
 47 | def generate_sample(target: manifest.Entry, offset: int, size: int) -> finding.Sample:
 48 |     """Generates a sample for a finding."""
 49 |     binary = is_binary(target)
 50 | 
 51 |     before = bytes()
 52 |     after = bytes()
 53 |     entry = bytes()
 54 | 
 55 |     try:
 56 |         # Make sure we don't try and read past the beginning and end of the file.
 57 |         target_sz = os.stat(target.path).st_size
 58 | 
 59 |         if offset - WINDOW_SIZE < 0:
 60 |             before_sz = offset
 61 |             before_offset = 0
 62 |         else:
 63 |             before_sz = WINDOW_SIZE
 64 |             before_offset = offset - before_sz
 65 | 
 66 |         # Ensure we read N bytes AFTER the entire match, not after the first byte of the
 67 |         # match.
 68 |         if offset + size + WINDOW_SIZE > target_sz:
 69 |             after_sz = target_sz - (offset + size)
 70 |             after_offset = target_sz - after_sz
 71 |         else:
 72 |             after_sz = WINDOW_SIZE
 73 |             after_offset = offset + size
 74 | 
 75 |         with open(target.path, "rb") as fin:
 76 |             # Seek to and read in the context before.
 77 |             fin.seek(before_offset)
 78 |             before = fin.read(before_sz)
 79 | 
 80 |             # Read the finding match itself. We have this already from yara, but we're
 81 |             # already here so we may as well.
 82 |             fin.seek(offset)
 83 |             entry = fin.read(size)
 84 | 
 85 |             # Seek to and read in the context after the finding.
 86 |             fin.seek(after_offset)
 87 |             after = fin.read(after_sz)
 88 |     except OSError as err:
 89 |         raise FileAccessException(err)
 90 | 
 91 |     if not binary:
 92 |         try:
 93 |             return finding.Sample(
 94 |                 window=WINDOW_SIZE,
 95 |                 before=str(before, "utf-8"),
 96 |                 after=str(after, "utf-8"),
 97 |                 finding=str(entry, "utf-8"),
 98 |                 binary=binary,
 99 |             )
100 |         except UnicodeDecodeError:
101 |             # Fall through and return a base64 encoded sample.
102 |             pass
103 | 
104 |     return finding.Sample(
105 |         window=WINDOW_SIZE,
106 |         before=base64.b64encode(before),
107 |         after=base64.b64encode(after),
108 |         finding=base64.b64encode(entry),
109 |         binary=binary,
110 |     )
111 | 
112 | 
113 | def generate_location(target: manifest.Entry, offset: int) -> finding.Location:
114 |     """Generates a location for a finding."""
115 |     # If the file is binary, we can't generate a line number so we already have the data
116 |     # we need.
117 |     if is_binary(target):
118 |         return finding.Location(offset=offset)
119 | 
120 |     # Attempt to generate a line number for the finding.
121 |     bytes_read = 0
122 |     line_number = 1
123 |     try:
124 |         with open(target.path, "r") as fin:
125 |             # Read in chunks, counting the number of newline characters up to the chunk
126 |             # which includes the finding.
127 |             while bytes_read < offset:
128 |                 bytes_read += CHUNK_SIZE
129 | 
130 |                 if bytes_read > offset:
131 |                     line_number += fin.read(offset).count("\n")
132 |                 else:
133 |                     line_number += fin.read(CHUNK_SIZE).count("\n")
134 |     except UnicodeDecodeError:
135 |         # It's possible to get into a state where the detected mime-type of a file is
136 |         # incorrect, resulting in unprocessable binary data making it here. In these
137 |         # cases we'll just bail early and report the number of bytes into the file of
138 |         # the finding. Exactly as we do for known binary files.
139 |         return finding.Location(offset=offset)
140 |     except OSError as err:
141 |         raise FileAccessException(err)
142 | 
143 |     return finding.Location(offset=offset, line=line_number)
144 | 
145 | 
146 | def generate_findings(target: manifest.Entry, match: yara.Match) -> List[finding.Entry]:
147 |     """Attempts to create findings based on matches inside of the target file."""
148 |     findings = []
149 | 
150 |     # Generate a new finding entry for each matched string. This is in order to ensure
151 |     # that multiple findings in the same file are listed separately - as they may be
152 |     # different credentials.
153 |     for offset, _, entry in match.strings:
154 |         location = generate_location(target, offset)
155 |         sample = generate_sample(target, offset, len(entry))
156 | 
157 |         # Add on information about the origin of the finding (that's us!)
158 |         source = finding.Source(
159 |             module=__name__,
160 |             reference=match.rule,
161 |             tags=match.tags,
162 |             version=match.meta.get("version", "UNKNOWN"),
163 |             description=match.meta.get("description"),
164 |         )
165 |         findings.append(
166 |             finding.Entry(
167 |                 md5=target.md5,
168 |                 path=target.overlay if target.overlay else target.path,
169 |                 confidence=match.meta.get("accuracy", 50),
170 |                 source=source,
171 |                 sample=sample,
172 |                 location=location,
173 |             )
174 |         )
175 | 
176 |     return findings
177 | 
178 | 
179 | def matcher(target: manifest.Entry, ruleset: yara.Rules) -> List[finding.Entry]:
180 |     findings = []
181 | 
182 |     for match in ruleset.match(target.path):
183 |         findings.extend(generate_findings(target, match))
184 | 
185 |     return findings
186 | 
187 | 
188 | def run(
189 |     targets: List[manifest.Entry],
190 |     pack: pack.Format,
191 |     workers: int = 10,
192 |     skip_on_eacces: bool = True,
193 | ) -> List[finding.Entry]:
194 |     """
195 |     Executes the rules based matcher on all input files, returning a list of finding
196 |     Entry objects.
197 |     """
198 |     findings = []
199 | 
200 |     # Load and compile all YARA rules up front.
201 |     namespaces = dict()
202 | 
203 |     for rule in pack.pack:
204 |         namespace = hashlib.md5(bytes(rule.path, "utf-8")).hexdigest()
205 |         namespaces[namespace] = rule.path
206 | 
207 |     try:
208 |         ruleset = yara.compile(filepaths=namespaces)
209 |     except yara.Error as err:
210 |         raise InvalidFormatException(err)
211 | 
212 |     # Run the matcher in a thread pool as we're likely to be I/O bound.
213 |     with ThreadPoolExecutor(max_workers=workers) as pool:
214 |         futures = []
215 | 
216 |         # Reject any input files which are supported archives. This is as we should have
217 |         # unpacked versions of these to process, which allows matching the specific file
218 |         # with a finding, rather than a finding on an archive.
219 |         #
220 |         # NOTE: Credentials stuffed into metadata of supported archive formats which
221 |         #       support archive metadata (such as Zip's "Extra") will not be found.
222 |         #
223 |         for target in targets:
224 |             if target.mime not in archive.MIME_TYPE_HANDLERS:
225 |                 futures.append(pool.submit(matcher, target, ruleset))
226 | 
227 |         for future in as_completed(futures):
228 |             try:
229 |                 findings.extend(future.result())
230 |             except FileAccessException:
231 |                 if not skip_on_eacces:
232 |                     raise
233 | 
234 |     return findings
235 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stacscan/stacs/11f350344a8971f7d1d7a4af39f978d938165487/tests/fixtures/.gitignore


--------------------------------------------------------------------------------
/tests/fixtures/findings/001.txt:
--------------------------------------------------------------------------------
1 | Credential is at the end of file, with less than the WINDOW_SIZE available for a sample
2 | this should cause the after window to be reduced to only match the bytes remaining after
3 | the finding.
4 | 
5 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
6 | 


--------------------------------------------------------------------------------
/tests/fixtures/findings/002.txt:
--------------------------------------------------------------------------------
1 | X
2 | 
3 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
4 | 
5 | There is plenty of data after the finding, but the sample before the finding should not
6 | try and read past the start of the file.
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/findings/003.txt:
--------------------------------------------------------------------------------
1 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk


--------------------------------------------------------------------------------
/tests/fixtures/findings/004.txt:
--------------------------------------------------------------------------------
1 | There is both plenty of data before and after the finding, so samples should operate
2 | properly and capture WINDOW_SIZE before and after.
3 | 
4 | ghp_YWNjb3VudHMyM3JkZnNkZnNkZnNkZnNkZnNk
5 | 
6 | Unlike the previous finding fixture (003) where the file ONLY contains the finding with
7 | no additional data.
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/001-simple.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ignore": [
3 |         {
4 |             "pattern": "src/crypto/rsa\\.c",
5 |             "reason": "PEM format RSA header and trailer constants due to parser."
6 |         }
7 |     ]
8 | }


--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-framework.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": [],
3 |     "ignore": [
4 |         {
5 |             "pattern": ".*/tests/.*",
6 |             "reason": "Test fixtures contain example credentials."
7 |         }
8 |     ]
9 | }


--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-project.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": [
3 |         "002-framework.valid.json",
4 |         "002-system.valid.json"
5 |     ],
6 |     "ignore": []
7 | }


--------------------------------------------------------------------------------
/tests/fixtures/ignore_list/002-system.valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "ignore": [
 4 |         {
 5 |             "pattern": ".*/libexample\\.so$",
 6 |             "reason": "libexample contains false positives due to a reason."
 7 |         },
 8 |         {
 9 |             "pattern": ".*/libssl.*?\\.so$",
10 |             "reason": "Ignore all hash rules inside version X.Y.Z of libssl",
11 |             "references": [
12 |                 "CredentialHashMD5",
13 |                 "CredentialHashSHA1",
14 |                 "CredentialHashSHA256",
15 |                 "CredentialHashSHA512"
16 |             ]
17 |         },
18 |         {
19 |             "md5": "e95348ed81f439d0a73a18835bd78eec",
20 |             "reason": "Ignore all hash rules inside version X.Y.Z of example file",
21 |             "references": [
22 |                 "CredentialHashMD5",
23 |                 "CredentialHashSHA1",
24 |                 "CredentialHashSHA256",
25 |                 "CredentialHashSHA512"
26 |             ]
27 |         }
28 |     ]
29 | }


--------------------------------------------------------------------------------
/tests/fixtures/pack/001-simple.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": [],
3 |     "pack": [
4 |         {
5 |             "module": "rules",
6 |             "path": "all.yar"
7 |         }
8 |     ]
9 | }


--------------------------------------------------------------------------------
/tests/fixtures/pack/002-cloud.valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "pack": [
 4 |         {
 5 |             "module": "rules",
 6 |             "path": "credential/cloud/aws/access_key.yar"
 7 |         },
 8 |         {
 9 |             "module": "rules",
10 |             "path": "credential/cloud/gcp/service_account.yar"
11 |         }
12 |     ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/fixtures/pack/002-parent.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": [
3 |         "002-cloud.valid.json",
4 |         "002-pki.valid.json"
5 |     ],
6 |     "pack": []
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki-dsa.valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "pack": [
 4 |         {
 5 |             "module": "rules",
 6 |             "path": "credential/pki/dsa/der.yar"
 7 |         },
 8 |         {
 9 |             "module": "rules",
10 |             "path": "credential/pki/dsa/pem.yar"
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki-rsa.valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "include": [],
 3 |     "pack": [
 4 |         {
 5 |             "module": "rules",
 6 |             "path": "credential/pki/rsa/der.yar"
 7 |         },
 8 |         {
 9 |             "module": "rules",
10 |             "path": "credential/pki/rsa/pem.yar"
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/tests/fixtures/pack/002-pki.valid.json:
--------------------------------------------------------------------------------
1 | {
2 |     "include": [
3 |         "002-pki-rsa.valid.json",
4 |         "002-pki-dsa.valid.json"
5 |     ],
6 |     "pack": []
7 | }


--------------------------------------------------------------------------------
/tests/test_filter_ignore_list.py:
--------------------------------------------------------------------------------
  1 | """Tests the STACS ignore list filter."""
  2 | 
  3 | import os
  4 | import unittest
  5 | 
  6 | import stacs.scan
  7 | 
  8 | 
  9 | class STACSFilterIgnoreListTestCase(unittest.TestCase):
 10 |     """Tests the STACS ignore list filter."""
 11 | 
 12 |     def setUp(self):
 13 |         """Ensure the application is setup for testing."""
 14 |         self.fixtures_path = os.path.join(
 15 |             os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/"
 16 |         )
 17 | 
 18 |     def tearDown(self):
 19 |         """Ensure everything is torn down between tests."""
 20 |         pass
 21 | 
 22 |     def test_by_path(self):
 23 |         """Validate whether path filters are working."""
 24 |         # Use the same fixture for all branches.
 25 |         finding = stacs.scan.model.finding.Entry(
 26 |             path="/a/a",
 27 |             md5="fa19207ef28b6a97828e3a22b11290e9",
 28 |             location=stacs.scan.model.finding.Location(
 29 |                 offset=300,
 30 |             ),
 31 |             source=stacs.scan.model.finding.Source(
 32 |                 module="stacs.scan.scanner.rules",
 33 |                 reference="SomeRule",
 34 |             ),
 35 |         )
 36 | 
 37 |         # Define ignores which should correctly be ignored.
 38 |         hits = [
 39 |             # Path matches, no other constraint.
 40 |             stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test"),
 41 |             # Path matches, reference matches.
 42 |             stacs.scan.model.ignore_list.Entry(
 43 |                 path="/a/a", reason="Test", references=["SomeRule", "OtherRule"]
 44 |             ),
 45 |             # Path matches, offset matches.
 46 |             stacs.scan.model.ignore_list.Entry(path="/a/a", reason="Test", offset=300),
 47 |         ]
 48 | 
 49 |         # Path differs.
 50 |         miss = stacs.scan.model.ignore_list.Entry(path="/a/b", reason="Test")
 51 |         self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
 52 | 
 53 |         # Path matches, reference differs.
 54 |         miss = stacs.scan.model.ignore_list.Entry(
 55 |             path="/a/a", reason="Test", references=["OtherRule"]
 56 |         )
 57 |         self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
 58 | 
 59 |         # Path matches, offset differs.
 60 |         miss = stacs.scan.model.ignore_list.Entry(
 61 |             path="/a/a", reason="Test", offset=1234
 62 |         )
 63 |         self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, miss), False)
 64 | 
 65 |         # Ensure all hit entries are matches.
 66 |         for hit in hits:
 67 |             self.assertEqual(stacs.scan.filter.ignore_list.by_path(finding, hit), True)
 68 | 
 69 |     def test_by_pattern(self):
 70 |         """Validate whether pattern filters are working."""
 71 |         # Use the same fixture for all branches.
 72 |         finding = stacs.scan.model.finding.Entry(
 73 |             path="/a/tests/a",
 74 |             md5="fa19207ef28b6a97828e3a22b11290e9",
 75 |             location=stacs.scan.model.finding.Location(
 76 |                 offset=300,
 77 |             ),
 78 |             source=stacs.scan.model.finding.Source(
 79 |                 module="stacs.scan.scanner.rules",
 80 |                 reference="SomeRule",
 81 |             ),
 82 |         )
 83 | 
 84 |         # Pattern matches, no other constraint.
 85 |         hit = stacs.scan.model.ignore_list.Entry(pattern=".*/tests/.*", reason="Test")
 86 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
 87 | 
 88 |         # Pattern matches, reference matches.
 89 |         hit = stacs.scan.model.ignore_list.Entry(
 90 |             pattern=".*/tests/.*",
 91 |             reason="Test",
 92 |             references=["SomeRule", "OtherRule"],
 93 |         )
 94 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
 95 | 
 96 |         # Pattern matches, offset matches.
 97 |         hit = stacs.scan.model.ignore_list.Entry(
 98 |             pattern=".*/tests/.*", reason="Test", offset=300
 99 |         )
100 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, hit), True)
101 | 
102 |         # Pattern differs.
103 |         miss = stacs.scan.model.ignore_list.Entry(pattern=r"\.shasums$", reason="Test")
104 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
105 | 
106 |         # Pattern matches, reference differs.
107 |         miss = stacs.scan.model.ignore_list.Entry(
108 |             pattern=".*/tests/.*", reason="Test", references=["OtherRule"]
109 |         )
110 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
111 | 
112 |         # Pattern matches, offset differs.
113 |         miss = stacs.scan.model.ignore_list.Entry(
114 |             pattern=".*/tests/.*", reason="Test", offset=1234
115 |         )
116 |         self.assertEqual(stacs.scan.filter.ignore_list.by_pattern(finding, miss), False)
117 | 
118 |     def test_by_hash(self):
119 |         """Validate whether hash filters are working."""
120 |         # Use the same fixture for all branches.
121 |         finding = stacs.scan.model.finding.Entry(
122 |             path="/a/tests/a",
123 |             md5="fa19207ef28b6a97828e3a22b11290e9",
124 |             location=stacs.scan.model.finding.Location(
125 |                 offset=300,
126 |             ),
127 |             source=stacs.scan.model.finding.Source(
128 |                 module="stacs.scan.scanner.rules",
129 |                 reference="SomeRule",
130 |             ),
131 |         )
132 | 
133 |         # Hash matches, no other constraint.
134 |         hit = stacs.scan.model.ignore_list.Entry(
135 |             md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test"
136 |         )
137 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
138 | 
139 |         # Hash matches, reference matches.
140 |         hit = stacs.scan.model.ignore_list.Entry(
141 |             md5="fa19207ef28b6a97828e3a22b11290e9",
142 |             reason="Test",
143 |             references=["SomeRule", "OtherRule"],
144 |         )
145 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
146 | 
147 |         # Hash matches, offset matches.
148 |         hit = stacs.scan.model.ignore_list.Entry(
149 |             md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=300
150 |         )
151 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, hit), True)
152 | 
153 |         # Hash differs.
154 |         miss = stacs.scan.model.ignore_list.Entry(
155 |             md5="cf42e6f36da80658591489975bbd845b", reason="Test"
156 |         )
157 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
158 | 
159 |         # Hash matches, reference differs.
160 |         miss = stacs.scan.model.ignore_list.Entry(
161 |             md5="fa19207ef28b6a97828e3a22b11290e9",
162 |             reason="Test",
163 |             references=["OtherRule"],
164 |         )
165 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
166 | 
167 |         # Hash matches, offset differs.
168 |         miss = stacs.scan.model.ignore_list.Entry(
169 |             md5="fa19207ef28b6a97828e3a22b11290e9", reason="Test", offset=1234
170 |         )
171 |         self.assertEqual(stacs.scan.filter.ignore_list.by_hash(finding, miss), False)
172 | 


--------------------------------------------------------------------------------
/tests/test_loader_filepath.py:
--------------------------------------------------------------------------------
 1 | """Tests the STACS filepath loader."""
 2 | 
 3 | import os
 4 | import unittest
 5 | 
 6 | 
 7 | class STACSLoaderFilepathTestCase(unittest.TestCase):
 8 |     """Tests the STACS filepath loader."""
 9 | 
10 |     def setUp(self):
11 |         """Ensure the application is setup for testing."""
12 |         self.fixtures_path = os.path.join(
13 |             os.path.dirname(os.path.abspath(__file__)), "fixtures/"
14 |         )
15 | 
16 |     def tearDown(self):
17 |         """Ensure everything is torn down between tests."""
18 |         pass
19 | 


--------------------------------------------------------------------------------
/tests/test_model_ignore_list.py:
--------------------------------------------------------------------------------
 1 | """Tests the STACS allow list model and validator."""
 2 | 
 3 | import json
 4 | import os
 5 | import unittest
 6 | 
 7 | import stacs.scan
 8 | 
 9 | 
10 | class STACSModelAllowListTestCase(unittest.TestCase):
11 |     """Tests the STACS allow list model and validator."""
12 | 
13 |     def setUp(self):
14 |         """Ensure the application is setup for testing."""
15 |         self.fixtures_path = os.path.join(
16 |             os.path.dirname(os.path.abspath(__file__)), "fixtures/ignore_list/"
17 |         )
18 | 
19 |     def tearDown(self):
20 |         """Ensure everything is torn down between tests."""
21 |         pass
22 | 
23 |     def test_simple(self):
24 |         """Ensure that simple allow lists can be loaded."""
25 |         with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f:
26 |             stacs.scan.model.ignore_list.Format(**json.load(f))
27 | 
28 |     def test_hierarchical_loading(self):
29 |         """Ensure that hierarchical allow lists can be loaded."""
30 |         with open(os.path.join(self.fixtures_path, "002-project.valid.json"), "r") as f:
31 |             stacs.scan.model.ignore_list.Format(**json.load(f))
32 | 


--------------------------------------------------------------------------------
/tests/test_model_pack.py:
--------------------------------------------------------------------------------
 1 | """Tests the STACS pack model and validator."""
 2 | 
 3 | import json
 4 | import os
 5 | import unittest
 6 | 
 7 | import stacs.scan
 8 | 
 9 | 
10 | class STACSModelPackTestCase(unittest.TestCase):
11 |     """Tests the STACS pack model and validator."""
12 | 
13 |     def setUp(self):
14 |         """Ensure the application is setup for testing."""
15 |         self.fixtures_path = os.path.join(
16 |             os.path.dirname(os.path.abspath(__file__)), "fixtures/pack/"
17 |         )
18 | 
19 |     def tearDown(self):
20 |         """Ensure everything is torn down between tests."""
21 |         pass
22 | 
23 |     def test_simple_pack(self):
24 |         """Ensure that simple packs can be loaded."""
25 |         with open(os.path.join(self.fixtures_path, "001-simple.valid.json"), "r") as f:
26 |             stacs.scan.model.pack.Format(**json.load(f))
27 | 


--------------------------------------------------------------------------------
/tests/test_output_sarif.py:
--------------------------------------------------------------------------------
 1 | """Tests the STACS SARIF output module."""
 2 | 
 3 | import unittest
 4 | 
 5 | import stacs.scan
 6 | 
 7 | 
 8 | class STACSOutputSARIFTestCase(unittest.TestCase):
 9 |     """Tests the STACS SARIF output module."""
10 | 
11 |     def setUp(self):
12 |         """Ensure the application is setup for testing."""
13 |         pass
14 | 
15 |     def tearDown(self):
16 |         """Ensure everything is torn down between tests."""
17 |         pass
18 | 
19 |     def test_add_artifact(self):
20 |         """Ensure that artifact entries are deduplicated by their full path."""
21 |         findings = [
22 |             stacs.scan.model.finding.Entry(
23 |                 path="/tmp/rootfs/etc/passwd",
24 |                 md5="b39bfc0e26a30024c76e4dcb8a1eae87",
25 |             ),
26 |             stacs.scan.model.finding.Entry(
27 |                 path="/tmp/rootfs/etc/passwd",
28 |                 md5="b39bfc0e26a30024c76e4dcb8a1eae87",
29 |             ),
30 |             stacs.scan.model.finding.Entry(
31 |                 path="/tmp/rootfs/a.tar.gz!a.tar!cred",
32 |                 md5="bf072e9119077b4e76437a93986787ef",
33 |             ),
34 |             stacs.scan.model.finding.Entry(
35 |                 path="/tmp/rootfs/a.tar.gz!a.tar!b_cred",
36 |                 md5="30cf3d7d133b08543cb6c8933c29dfd7",
37 |             ),
38 |             stacs.scan.model.finding.Entry(
39 |                 path="/tmp/rootfs/b.tar.gz!b_cred",
40 |                 md5="57b8d745384127342f95660d97e1c9c2",
41 |             ),
42 |             stacs.scan.model.finding.Entry(
43 |                 path="/tmp/rootfs/b.tar.gz!a.tar!cred",
44 |                 md5="787c9a8e2148e711f6e9f44696cf341f",
45 |             ),
46 |             stacs.scan.model.finding.Entry(
47 |                 path="/tmp/rootfs/a.tar.gz!a.tar!b.tar.gz!b.tar!pass",
48 |                 md5="d2a33790e5bf28b33cdbf61722a06989",
49 |             ),
50 |         ]
51 | 
52 |         # Ensure we get the expected number of artifacts in the artifacts list.
53 |         artifacts = []
54 |         for finding in findings:
55 |             _, artifacts = stacs.scan.output.sarif.add_artifact(
56 |                 "/tmp/rootfs/", finding, artifacts
57 |             )
58 | 
59 |         # Ensure findings are unfurled into the expected number of unique artifacts.
60 |         self.assertEqual(len(artifacts), 12)
61 | 


--------------------------------------------------------------------------------
/tests/test_scanner_rule.py:
--------------------------------------------------------------------------------
 1 | """Tests the STACS Scanner Rule module."""
 2 | 
 3 | import os
 4 | import unittest
 5 | 
 6 | import stacs.scan
 7 | 
 8 | 
 9 | class STACSScannerRuleTestCase(unittest.TestCase):
10 |     """Tests the STACS Scanner Rule module."""
11 | 
12 |     def setUp(self):
13 |         """Ensure the application is setup for testing."""
14 |         self.fixtures_path = os.path.join(
15 |             os.path.dirname(os.path.abspath(__file__)), "fixtures/findings/"
16 |         )
17 | 
18 |     def tearDown(self):
19 |         """Ensure everything is torn down between tests."""
20 |         pass
21 | 
22 |     def test_generate_sample(self):
23 |         """Ensures that samples are correctly generated."""
24 |         reduced_after_finding = stacs.scan.model.manifest.Entry(
25 |             path=os.path.join(self.fixtures_path, "001.txt")
26 |         )
27 |         reduced_before_finding = stacs.scan.model.manifest.Entry(
28 |             path=os.path.join(self.fixtures_path, "002.txt")
29 |         )
30 |         only_finding = stacs.scan.model.manifest.Entry(
31 |             path=os.path.join(self.fixtures_path, "003.txt")
32 |         )
33 |         sufficent_before_after_finding = stacs.scan.model.manifest.Entry(
34 |             path=os.path.join(self.fixtures_path, "004.txt")
35 |         )
36 | 
37 |         # Check that the correct number of bytes were extracted before and after the
38 |         # respective findings.
39 |         context = stacs.scan.scanner.rules.generate_sample(
40 |             reduced_after_finding,
41 |             191,  # Offset.
42 |             40,  # Size.
43 |         )
44 |         self.assertEqual(len(context.before), 20)
45 |         self.assertEqual(len(context.finding), 40)
46 |         self.assertEqual(len(context.after), 1)
47 | 
48 |         context = stacs.scan.scanner.rules.generate_sample(
49 |             reduced_before_finding,
50 |             3,  # Offset.
51 |             40,  # Size.
52 |         )
53 |         self.assertEqual(len(context.before), 3)
54 |         self.assertEqual(len(context.finding), 40)
55 |         self.assertEqual(len(context.after), 20)
56 | 
57 |         context = stacs.scan.scanner.rules.generate_sample(
58 |             only_finding,
59 |             0,  # Offset.
60 |             40,  # Size.
61 |         )
62 |         self.assertEqual(len(context.before), 0)
63 |         self.assertEqual(len(context.finding), 40)
64 |         self.assertEqual(len(context.after), 0)
65 | 
66 |         context = stacs.scan.scanner.rules.generate_sample(
67 |             sufficent_before_after_finding,
68 |             137,  # Offset.
69 |             40,  # Size.
70 |         )
71 |         self.assertEqual(len(context.before), 20)
72 |         self.assertEqual(len(context.finding), 40)
73 |         self.assertEqual(len(context.after), 20)
74 | 


--------------------------------------------------------------------------------
/wrapper/stacs-scan:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # This wrapper is used to determine whether a stacs ignore is present in the scan
 4 | # directory.
 5 | #
 6 | 
 7 | SCAN_DIR="/mnt/stacs/input"
 8 | 
 9 | # Define additional flags to pass.
10 | STACS_FLAGS=""
11 | 
12 | if [ ${STACS_SKIP_UNPROCESSABLE:-0} -ne 0 ]; then
13 |     STACS_FLAGS="${STACS_FLAGS} --skip-unprocessable"
14 | fi
15 | 
16 | if [ ${STACS_THREADS:-10} -ne 10 ]; then
17 |     STACS_FLAGS="${STACS_FLAGS} --threads ${STACS_THREADS}"
18 | fi
19 | 
20 | if [ ${STACS_DEBUG:-0} -ne 0 ]; then
21 |     STACS_FLAGS="${STACS_FLAGS} --debug"
22 | fi
23 | 
24 | if [ ${STACS_OUTPUT_PRETTY:-0} -ne 0 ]; then
25 |     STACS_FLAGS="${STACS_FLAGS} --pretty"
26 | fi
27 | 
28 | # If additional arguments are provided, use them instead of defaults.
29 | if [ "$#" -gt 0 ]; then
30 |     stacs "$@"
31 | else
32 |     # Use an ignore list, if present.
33 |     if [ -e "${SCAN_DIR}/stacs.ignore.json" ]; then
34 |         stacs \
35 |             --rule-pack /mnt/stacs/rules/credential.json \
36 |             --cache-directory /mnt/stacs/cache \
37 |             --ignore-list "${SCAN_DIR}/stacs.ignore.json" \
38 |             ${STACS_FLAGS} \
39 |             "${SCAN_DIR}/"
40 |     else
41 |         stacs \
42 |             --rule-pack /mnt/stacs/rules/credential.json \
43 |             --cache-directory /mnt/stacs/cache \
44 |             ${STACS_FLAGS} \
45 |             "${SCAN_DIR}/"
46 |     fi
47 | fi
48 | 


--------------------------------------------------------------------------------