├── .dockerignore ├── .github └── workflows │ ├── pip-audit.yml │ ├── pythonpublish.yml │ └── tests.yml ├── CODEOWNERS ├── LICENSE ├── README.md ├── hooks ├── README.md ├── pre-commit └── pre-push ├── it_depends ├── __init__.py ├── __main__.py ├── audit.py ├── autotools.py ├── cargo.py ├── cli.py ├── cmake.py ├── db.py ├── dependencies.py ├── docker.py ├── go.py ├── graphs.py ├── html.py ├── it_depends.py ├── native.py ├── npm.py ├── pip.py ├── resolver.py ├── sbom.py ├── ubuntu │ ├── __init__.py │ ├── apt.py │ ├── docker.py │ └── resolver.py └── vcs.py ├── pyproject.toml ├── setup.py └── test ├── __init__.py ├── rebuild_expected_output.py ├── repos ├── .gitignore ├── bitcoin.expected.json ├── cvedb.expected.json ├── pe-parse.expected.json └── siderophile.expected.json ├── test_apt.py ├── test_audit.py ├── test_db.py ├── test_go.py ├── test_graphs.py ├── test_native.py ├── test_resolver.py ├── test_smoke.py ├── test_ubuntu.py └── test_vcs.py /.dockerignore: -------------------------------------------------------------------------------- 1 | test/repos 2 | -------------------------------------------------------------------------------- /.github/workflows/pip-audit.yml: -------------------------------------------------------------------------------- 1 | name: Scan dependencies for vulnerabilities with pip-audit 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | schedule: 9 | - cron: "0 12 * * *" 10 | 11 | jobs: 12 | pip-audit: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v3 18 | 19 | - name: Install Python 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: "3.x" 23 | 24 | - name: Install project 25 | run: | 26 | python -m venv /tmp/pip-audit-env 27 | source /tmp/pip-audit-env/bin/activate 28 | 29 | python -m pip install --upgrade pip 30 | python -m pip install . 31 | 32 | 33 | - name: Run pip-audit 34 | uses: pypa/gh-action-pip-audit@v1.0.8 35 | with: 36 | virtual-environment: /tmp/pip-audit-env 37 | 38 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-20.04 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-20.04 16 | strategy: 17 | matrix: 18 | python-version: ["3.9", "3.10", "3.11", "3.12"] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | sudo apt-get update -y 29 | sudo apt-get install -y apt-file cmake autoconf golang cargo npm clang 30 | sudo apt-file update 31 | python -m pip install --upgrade pip 32 | pip install setuptools 33 | pip install .[dev] 34 | - name: Lint with flake8 35 | run: | 36 | # stop the build if there are Python syntax errors or undefined names 37 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics 38 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 39 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 40 | - name: Test with pytest 41 | run: | 42 | pytest test --ignore test/repos 43 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @ESultanik 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # It-Depends 3 | [![PyPI version](https://badge.fury.io/py/it-depends.svg)](https://badge.fury.io/py/it-depends) 4 | [![Tests](https://github.com/trailofbits/it-depends/workflows/tests/badge.svg?branch=master)](https://github.com/trailofbits/it-depends/actions) 5 | [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc) 6 | 7 | It-Depends is a tool to automatically build a dependency graph and Software Bill of Materials (SBOM) for packages and arbitrary source code repositories. You can use it to enumerate all third party dependencies for a software package, map those dependencies to known security vulnerabilities, as well as compare the similarity between two packages based on their dependencies. 8 | 9 | To the best of our knowledge, It-Depends is the only such tool with the following features: 10 | * Support for C/C++ projects (both autootools and cmake) 11 | * Automated resolution of native library dependencies partially based on dynamic analysis (_e.g._, the Python package `pytz` depends on the native library `libtinfo.so.6`) 12 | * Enumeration of _all possible_ dependency resolutions, not just a _single feasible_ resolution 13 | * A comparison metric for the similarity between two packages based on their dependency graph 14 | 15 | ## Features ⭐ 16 | * Supports Go, JavaScript, Rust, Python, and C/C++ projects. 17 | * Accepts source code repositories or package specifications like `pip:it-depends` 18 | * Extracts dependencies of cmake/autotool repostories without building it 19 | * Finds native dependencies for high level languages like Python or JavaScript 20 | * Provides visualization based on vis.js or dot 21 | * Matches dependencies and CVEs 22 | * Export Software Bills of Materials (SBOMs) 23 | * Machine-intelligible JSON output 24 | * Support for the SPDX standard is [in active development](https://github.com/trailofbits/it-depends/tree/dev/spdx) 25 | 26 | ### Can It-Depends Do It? It Depends. 🍋 27 | * It-Depends does not detect vendored or copy/pasted dependencies 28 | * Results from build systems like autotools and cmake that entail arbitrary computation at install time are 29 | best-effort 30 | * Resolution of native dependencies is best-effort 31 | * Some native dependencies are resolved through dynamic analysis 32 | * Native dependencies are inferred by cross-referencing file requirements against paths provided by the Ubuntu 33 | package repository; dependencies may be different across other Linux distributions or Ubuntu versions 34 | * It-Depends attempts to resolve *all* possible package versions that satisfy a dependency 35 | * It-Depends *does not* find a single satisfying package resolution 36 | * The list of resolved packages is intended to be a superset of the packages required by the installation of 37 | a package on any system 38 | * The `--audit` feature may discover vulnerabilities in upstream dependencies that are either not exploitable in the 39 | target package or are in a package version that cannot exist in any valid dependency resolution of the target 40 | package 41 | * It-Depends caches data that it expects to be immutable in a local database 42 | * If a package is ever deleted or yanked from a package repository after it was already cached, It-Depends will 43 | continue to use the cached data unless the cache is cleared with `--clear-cache` 44 | 45 | 46 | ## Quickstart 🚀 47 | ```commandline 48 | $ pip3 install it-depends 49 | ``` 50 | 51 | ### Running it 🏃 52 | Run `it-depends` in the root of the source repository you would like to analyze: 53 | ```console 54 | $ cd /path/to/project 55 | $ it-depends 56 | ``` 57 | or alternatively point it to the path directly: 58 | ```console 59 | $ it-depends /path/to/project 60 | ``` 61 | or alternatively specify a package from a public package repository: 62 | ```console 63 | $ it-depends pip:numpy 64 | $ it-depends apt:libc6@2.31 65 | $ it-depends npm:lodash@>=4.17.0 66 | ``` 67 | 68 | It-Depends will output the full dependency hierarchy in JSON format. Additional output formats such 69 | as Graphviz/Dot are available via the `--output-format` option. 70 | 71 | It-Depends can automatically try to match packages against the [OSV vulnerability database](https://osv.dev/) with the 72 | `--audit` option. This is a best-effort matching as it is based on package names, which might not always consistent. 73 | Any discovered vulnerabilities are added to the JSON output. 74 | 75 | It-Depends attempts to parallelize as much of its effort as possible. To limit the maximum number of parallel tasks, use 76 | the `--max-workers` option. 77 | 78 | By default, It-Depends recursively resolves all packages' dependencies to construct a complete dependency graph. The 79 | depth of the recursion can be limited using the `--depth-limit` option. For example, 80 | ```console 81 | $ it-depends pip:graphtage --depth-limit 1 82 | ``` 83 | will only enumerate the direct dependencies of Graphtage. 84 | 85 | ### Examples 🧑‍🏫 86 | 87 | Here is an example of running It-Depends on its own source repository: 88 | ![](https://gist.githubusercontent.com/feliam/e906ce723333b2b55237a71c4028559e/raw/e60f46c35b215a73a37a1d1ce3bb43eaead76af4/it-depends-demo.svg?sanitize=1) 89 | 90 | This is the resulting [json](https://gist.github.com/feliam/2bdec76f7aa50602869059bfa14df156) 91 | with all the discovered dependencies. 92 | This is the resulting [Graphviz dot file](https://gist.github.com/feliam/275951f5788c23a477bc7cf758a32cc2) 93 | producing this 94 | ![dependency graph](https://user-images.githubusercontent.com/1017522/116887041-33903b80-ac00-11eb-9288-f3d286231e47.png) 95 | 96 | This is the resulting dependency graph: 97 | ![dependency graph](https://user-images.githubusercontent.com/1017522/126380710-0bf4fd66-0d2f-4cb1-a0ff-96fe715c4981.png) 98 | 99 | ### It-Depends’ Dependencies 🎭 100 | 101 | JavaScript requires `npm`\ 102 | Rust requires `cargo`\ 103 | Python requires `pip`\ 104 | C/C++ requires `autotools` and/or `cmake`\ 105 | Several native dependencies are resolved using Ubuntu’s file to path database `apt-file`, but this is seamlessly 106 | handled through an Ubuntu `docker` container on other distributions and operating systems\ 107 | Currently `docker` is used to resolve native dependencies 108 | 109 | ## Development 👷 110 | ```commandline 111 | $ git clone https://github.com/trailofbits/it-depends 112 | $ cd it-depends 113 | $ python3 -m venv venv # Optional virtualenv 114 | $ ./venv/bin/activate # Optional virtualenv 115 | $ pip3 install -e '.[dev]' 116 | $ git config core.hooksPath ./hooks # Optionally enable git commit hooks for linting 117 | ``` 118 | 119 | ## License and Acknowledgements 📃️ 120 | 121 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) based upon work supported by DARPA under Contract No. HR001120C0084 (Distribution Statement **A**, Approved for Public Release: Distribution Unlimited). Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the United States Government or DARPA. 122 | 123 | [Felipe Manzano](https://github.com/feliam) and [Evan Sultanik](https://github.com/ESultanik) are 124 | the active maintainers, but [Alessandro Gario](https://github.com/alessandrogario), 125 | [Eric Kilmer](https://github.com/ekilmer), [Alexander Remie](https://github.com/rmi7), and [Henrik Brodin](https://github.com/hbrodin) all made significant 126 | contributions to the tool’s inception and development. 127 | 128 | It-Depends is licensed under the [GNU Lesser General Public License v3.0](LICENSE). [Contact us](mailto:opensource@trailofbits.com) if you’re looking for an exception to the terms. 129 | 130 | © 2021, Trail of Bits. 131 | -------------------------------------------------------------------------------- /hooks/README.md: -------------------------------------------------------------------------------- 1 | # Default Git Hooks for it-depends Development 2 | 3 | To enable these hooks, developers must run this from within the repo after cloning: 4 | ```bash 5 | $ git config core.hooksPath ./hooks 6 | ``` 7 | -------------------------------------------------------------------------------- /hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if git rev-parse --verify HEAD >/dev/null 2>&1 4 | then 5 | against=HEAD 6 | else 7 | # Initial commit: diff against an empty tree object 8 | against=$(git hash-object -t tree /dev/null) 9 | fi 10 | 11 | # If you want to allow non-ASCII filenames set this variable to true. 12 | allownonascii=$(git config --bool hooks.allownonascii) 13 | 14 | # Redirect output to stderr. 15 | exec 1>&2 16 | 17 | # Cross platform projects tend to avoid non-ASCII filenames; prevent 18 | # them from being added to the repository. We exploit the fact that the 19 | # printable range starts at the space character and ends with tilde. 20 | if [ "$allownonascii" != "true" ] && 21 | # Note that the use of brackets around a tr range is ok here, (it's 22 | # even required, for portability to Solaris 10's /usr/bin/tr), since 23 | # the square bracket bytes happen to fall in the designated range. 24 | test $(git diff --cached --name-only --diff-filter=A -z $against | 25 | LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 26 | then 27 | cat <<\EOF 28 | Error: Attempt to add a non-ASCII file name. 29 | 30 | This can cause problems if you want to work with people on other platforms. 31 | 32 | To be portable it is advisable to rename the file. 33 | 34 | If you know what you are doing you can disable this check using: 35 | 36 | git config hooks.allownonascii true 37 | EOF 38 | exit 1 39 | fi 40 | 41 | which flake8 >/dev/null 2>/dev/null 42 | RESULT=$? 43 | if [ $RESULT -ne 0 ]; then 44 | cat << \EOF 45 | flake8 is not installed! Run this from the root of the it-depends repo: 46 | 47 | pip3 install -e .[dev] 48 | EOF 49 | exit 1 50 | fi 51 | 52 | echo Linting Python code... 53 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics 1>/dev/null 2>/dev/null 54 | RESULT=$? 55 | if [ $RESULT -ne 0 ]; then 56 | cat <<\EOF 57 | Failed Python lint: 58 | 59 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics 60 | 61 | EOF 62 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics 63 | exit 1 64 | fi 65 | 66 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 1>/dev/null 2>/dev/null 67 | RESULT=$? 68 | if [ $RESULT -ne 0 ]; then 69 | cat <<\EOF 70 | Failed Python lint: 71 | 72 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 73 | 74 | EOF 75 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 76 | exit 1 77 | fi 78 | 79 | which mypy >/dev/null 2>/dev/null 80 | RESULT=$? 81 | if [ $RESULT -ne 0 ]; then 82 | cat << \EOF 83 | mypy is not installed! Run this from the root of the it-depends repo: 84 | 85 | pip3 install -e .[dev] 86 | EOF 87 | exit 1 88 | fi 89 | 90 | echo Type-checking Python code... 91 | mypy --exclude "test/repos/.*" --ignore-missing-imports it_depends test 92 | RESULT=$? 93 | if [ $RESULT -ne 0 ]; then 94 | exit $RESULT 95 | fi 96 | 97 | # If there are whitespace errors, print the offending file names and fail. 98 | git diff-index --check --cached $against -- 99 | -------------------------------------------------------------------------------- /hooks/pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | which pytest >/dev/null 2>/dev/null 4 | RESULT=$? 5 | if [ $RESULT -ne 0 ]; then 6 | cat << \EOF 7 | pytest is not installed! Run this from the root of the it-depends repo: 8 | 9 | pip3 install -e .[dev] 10 | EOF 11 | exit 1 12 | fi 13 | 14 | pytest test --ignore test/repos 15 | exit $? 16 | -------------------------------------------------------------------------------- /it_depends/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from pkgutil import iter_modules 3 | from pathlib import Path 4 | 5 | from .it_depends import * 6 | 7 | # Automatically load all modules in the `it_depends` package, 8 | # so all DependencyClassifiers will auto-register themselves: 9 | package_dir = Path(__file__).resolve().parent 10 | for (_, module_name, _) in iter_modules([str(package_dir)]): # type: ignore 11 | # import the module and iterate through its attributes 12 | if module_name != "__main__": 13 | module = import_module(f"{__name__}.{module_name}") 14 | -------------------------------------------------------------------------------- /it_depends/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import main 2 | 3 | if __name__ == "__main__": 4 | exit(main()) 5 | -------------------------------------------------------------------------------- /it_depends/audit.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from concurrent.futures import ThreadPoolExecutor, as_completed 3 | import logging 4 | from requests import post 5 | from tqdm import tqdm 6 | from typing import Dict, FrozenSet, Iterable, List, Union, Tuple 7 | 8 | from .dependencies import Package, PackageRepository, Vulnerability 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class OSVVulnerability(Vulnerability): 14 | """Represents a vulnerability from the OSV project""" 15 | 16 | """Additional keys available from the OSV Vulnerability db.""" 17 | EXTRA_KEYS = [ 18 | "published", 19 | "modified", 20 | "withdrawn", 21 | "related", 22 | "package", 23 | "details", 24 | "affects", 25 | "affected", 26 | "references", 27 | "severity", 28 | "database_specific", 29 | "ecosystem_specific", 30 | ] 31 | 32 | def __init__(self, osv_dict: Dict): 33 | # Get the first available information as summary (N/A if none) 34 | summary = osv_dict.get("summary", "") or osv_dict.get("details", "") or "N/A" 35 | super().__init__(osv_dict["id"], osv_dict.get("aliases", []), summary) 36 | 37 | # Inherit all other attributes 38 | for k in OSVVulnerability.EXTRA_KEYS: 39 | setattr(self, k, osv_dict.get(k, None)) 40 | 41 | @classmethod 42 | def from_osv_dict(cls, d: Dict): 43 | return OSVVulnerability(d) 44 | 45 | 46 | class VulnerabilityProvider(ABC): 47 | """Interface of a vulnerability provider.""" 48 | 49 | def query(self, pkg: Package) -> Iterable[Vulnerability]: 50 | """Queries the vulnerability provider for vulnerabilities in pkg""" 51 | raise NotImplementedError() 52 | 53 | 54 | class OSVProject(VulnerabilityProvider): 55 | """OSV project vulnerability provider""" 56 | 57 | QUERY_URL = "https://api.osv.dev/v1/query" 58 | 59 | def query(self, pkg: Package) -> Iterable[OSVVulnerability]: 60 | """Queries the OSV project for vulnerabilities in Package pkg""" 61 | q = {"version": str(pkg.version), "package": {"name": pkg.name}} 62 | r = post(OSVProject.QUERY_URL, json=q).json() 63 | return map(OSVVulnerability.from_osv_dict, r.get("vulns", [])) 64 | 65 | 66 | def vulnerabilities(repo: PackageRepository, nworkers=None) -> PackageRepository: 67 | def _get_vulninfo(pkg: Package) -> Tuple[Package, FrozenSet[Vulnerability]]: 68 | """Enrich a Package with vulnerability information""" 69 | ret = OSVProject().query(pkg) 70 | # Do not modify pkg here to ensure no concurrent 71 | # modifications, instead return and let the main 72 | # thread handle the updates. 73 | return (pkg, frozenset({vuln: vuln for vuln in ret})) 74 | 75 | with ThreadPoolExecutor(max_workers=nworkers) as executor, tqdm( 76 | desc="Checking for vulnerabilities", leave=False, unit=" packages" 77 | ) as t: 78 | futures = {executor.submit(_get_vulninfo, pkg): pkg for pkg in repo} 79 | t.total = len(futures) 80 | 81 | for future in as_completed(futures): 82 | try: 83 | t.update(1) 84 | pkg, vulns = future.result() 85 | except Exception as exc: 86 | logger.error( 87 | "Failed to retrieve vulnerability information. " "Exception: {}".format(exc) 88 | ) 89 | else: 90 | pkg.update_vulnerabilities(vulns) 91 | 92 | return repo 93 | -------------------------------------------------------------------------------- /it_depends/autotools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import functools 3 | import re 4 | import itertools 5 | import shutil 6 | import subprocess 7 | import logging 8 | import tempfile 9 | from typing import List, Optional, Tuple 10 | 11 | from it_depends.ubuntu.apt import cached_file_to_package as file_to_package 12 | 13 | from .dependencies import ( 14 | Dependency, 15 | DependencyResolver, 16 | PackageCache, 17 | ResolverAvailability, 18 | SimpleSpec, 19 | SourcePackage, 20 | SourceRepository, 21 | Version, 22 | ) 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | class AutotoolsResolver(DependencyResolver): 28 | """This attempts to parse configure.ac in an autotool based repo. 29 | It supports the following macros: 30 | AC_INIT, AC_CHECK_HEADER, AC_CHECK_LIB, PKG_CHECK_MODULES 31 | 32 | BUGS: 33 | does not handle boost deps 34 | assumes ubuntu host 35 | """ 36 | 37 | name = "autotools" 38 | description = "classifies the dependencies of native/autotools packages parsing configure.ac" 39 | 40 | def is_available(self) -> ResolverAvailability: 41 | if shutil.which("autoconf") is None: 42 | return ResolverAvailability( 43 | False, 44 | "`autoconf` does not appear to be installed! " 45 | "Make sure it is installed and in the PATH.", 46 | ) 47 | return ResolverAvailability(True) 48 | 49 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 50 | return bool(self.is_available()) and (repo.path / "configure.ac").exists() 51 | 52 | @staticmethod 53 | def _ac_check_header(header_file, file_to_package_cache=None): 54 | """ 55 | Macro: AC_CHECK_HEADER 56 | Checks if the system header file header-file is compilable. 57 | https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Generic-Headers.html 58 | """ 59 | logger.info(f"AC_CHECK_HEADER {header_file}") 60 | package_name = file_to_package( 61 | f"{re.escape(header_file)}", file_to_package_cache=file_to_package_cache 62 | ) 63 | return Dependency(package=package_name, semantic_version=SimpleSpec("*"), source="ubuntu") 64 | 65 | @staticmethod 66 | def _ac_check_lib(function, file_to_package_cache=None): 67 | """ 68 | Macro: AC_CHECK_LIB 69 | Checks for the presence of certain C, C++, or Fortran library archive files. 70 | https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Libraries.html#Libraries 71 | """ 72 | lib_file, function_name = function.split(".") 73 | logger.info(f"AC_CHECK_LIB {lib_file}") 74 | package_name = file_to_package( 75 | f"lib{re.escape(lib_file)}(.a|.so)", 76 | file_to_package_cache=file_to_package_cache, 77 | ) 78 | return Dependency(package=package_name, semantic_version=SimpleSpec("*"), source="ubuntu") 79 | 80 | @staticmethod 81 | def _pkg_check_modules(module_name, version=None, file_to_package_cache=None): 82 | """ 83 | Macro: PKG_CHECK_MODULES 84 | The main interface between autoconf and pkg-config. 85 | Provides a very basic and easy way to check for the presence of a 86 | given package in the system. 87 | """ 88 | if not version: 89 | version = "*" 90 | module_file = re.escape(module_name + ".pc") 91 | logger.info(f"PKG_CHECK_MODULES {module_file}, {version}") 92 | package_name = file_to_package(module_file, file_to_package_cache=file_to_package_cache) 93 | return Dependency( 94 | package=package_name, semantic_version=SimpleSpec(version), source="ubuntu" 95 | ) 96 | 97 | @staticmethod 98 | @functools.lru_cache(maxsize=128) 99 | def _replace_variables(token: str, configure: str): 100 | """ 101 | Search all variable occurrences in token and then try to find 102 | bindings for them in the configure script. 103 | """ 104 | if "$" not in token: 105 | return token 106 | variable_list = re.findall(r"\$([a-zA-Z_0-9]+)|\${([_a-zA-Z0-9]+)}", token) 107 | variables = set( 108 | var for var in itertools.chain(*variable_list) if var 109 | ) # remove dups and empty 110 | for var in variables: 111 | logger.info(f"Trying to find bindings for {var} in configure") 112 | 113 | # This tries to find a single assign to the variable in question 114 | # ... var= "SOMETHING" 115 | # We ignore the fact thst variables could also appear in other constructs 116 | # For example: 117 | # for var in THIS THAT ; 118 | # TODO/CHALLENGE Merge this two \/ 119 | solutions = re.findall(f'{var}=\\s*"([^"]*)"', configure) 120 | solutions += re.findall(f"{var}=\\s*'([^']*)'", configure) 121 | if len(solutions) > 1: 122 | logger.warning(f"Found several solutions for {var}: {solutions}") 123 | if len(solutions) == 0: 124 | logger.warning(f"No solution found for binding {var}") 125 | continue 126 | logger.info(f"Found a solution {solutions}") 127 | sol = ( 128 | solutions 129 | + [ 130 | None, 131 | ] 132 | )[0] 133 | if sol is not None: 134 | token = token.replace(f"${var}", sol).replace(f"${{{var}}}", sol) 135 | if "$" in token: 136 | raise ValueError(f"Could not find a binding for variable/s in {token}") 137 | return token 138 | 139 | def resolve_from_source( 140 | self, repo: SourceRepository, cache: Optional[PackageCache] = None 141 | ) -> Optional[SourcePackage]: 142 | if not self.can_resolve_from_source(repo): 143 | return None 144 | logger.info(f"Getting dependencies for autotool repo {repo.path.absolute()}") 145 | with tempfile.NamedTemporaryFile() as tmp: 146 | # builds a temporary copy of configure.ac containing aclocal env 147 | subprocess.check_output(("aclocal", f"--output={tmp.name}"), cwd=repo.path) 148 | with open(tmp.name, "ab") as tmp2: 149 | with open(repo.path / "configure.ac", "rb") as conf: 150 | tmp2.write(conf.read()) 151 | 152 | trace = subprocess.check_output( 153 | ( 154 | "autoconf", 155 | "-t", 156 | "AC_CHECK_HEADER:$n:$1", 157 | "-t", 158 | "AC_CHECK_LIB:$n:$1.$2", 159 | "-t", 160 | "PKG_CHECK_MODULES:$n:$2", 161 | "-t", 162 | "PKG_CHECK_MODULES_STATIC:$n", 163 | tmp.name, 164 | ), 165 | cwd=repo.path, 166 | ).decode("utf8") 167 | configure = subprocess.check_output(["autoconf", tmp.name], cwd=repo.path).decode( 168 | "utf8" 169 | ) 170 | 171 | file_to_package_cache: List[Tuple[str]] = [] 172 | deps = [] 173 | for macro in trace.split("\n"): 174 | logger.debug(f"Handling: {macro}") 175 | macro, *arguments = macro.split(":") 176 | try: 177 | arguments = tuple(self._replace_variables(arg, configure) for arg in arguments) # type: ignore 178 | except Exception as e: 179 | logger.info(str(e)) 180 | continue 181 | try: 182 | if macro == "AC_CHECK_HEADER": 183 | deps.append( 184 | self._ac_check_header( 185 | header_file=arguments[0], 186 | file_to_package_cache=file_to_package_cache, 187 | ) 188 | ) 189 | elif macro == "AC_CHECK_LIB": 190 | deps.append( 191 | self._ac_check_lib( 192 | function=arguments[0], 193 | file_to_package_cache=file_to_package_cache, 194 | ) 195 | ) 196 | elif macro == "PKG_CHECK_MODULES": 197 | module_name, *version = arguments[0].split(" ") 198 | deps.append( 199 | self._pkg_check_modules( 200 | module_name=module_name, 201 | version="".join(version), 202 | file_to_package_cache=file_to_package_cache, 203 | ) 204 | ) 205 | else: 206 | logger.error("Macro not supported %r", macro) 207 | except Exception as e: 208 | logger.error(str(e)) 209 | continue 210 | 211 | """ 212 | # Identity of this package. 213 | PACKAGE_NAME='Bitcoin Core' 214 | PACKAGE_TARNAME='bitcoin' 215 | PACKAGE_VERSION='21.99.0' 216 | PACKAGE_STRING='Bitcoin Core 21.99.0' 217 | PACKAGE_BUGREPORT='https://github.com/bitcoin/bitcoin/issues' 218 | PACKAGE_URL='https://bitcoincore.org/""" 219 | try: 220 | package_name = self._replace_variables("$PACKAGE_NAME", configure) 221 | except ValueError as e: 222 | logger.error(str(e)) 223 | package_name = os.path.basename(repo.path) 224 | 225 | try: 226 | package_version = self._replace_variables("$PACKAGE_VERSION", configure) 227 | except ValueError as e: 228 | logger.error(str(e)) 229 | package_version = "0.0.0" 230 | 231 | return SourcePackage( 232 | name=package_name, 233 | version=Version.coerce(package_version), 234 | source=self.name, 235 | dependencies=deps, 236 | source_repo=repo, 237 | ) 238 | -------------------------------------------------------------------------------- /it_depends/cargo.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import json 3 | import tempfile 4 | import shutil 5 | import subprocess 6 | import logging 7 | from typing import Iterator, Optional, Type, Union, Dict 8 | 9 | from semantic_version.base import Always, BaseSpec 10 | 11 | from .dependencies import ( 12 | Dependency, 13 | DependencyResolver, 14 | Package, 15 | PackageCache, 16 | ResolverAvailability, 17 | SimpleSpec, 18 | SourcePackage, 19 | SourceRepository, 20 | Version, 21 | InMemoryPackageCache, 22 | ) 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | @BaseSpec.register_syntax 28 | class CargoSpec(SimpleSpec): 29 | SYNTAX = "cargo" 30 | 31 | class Parser(SimpleSpec.Parser): 32 | @classmethod 33 | def parse(cls, expression): 34 | # The only difference here is that cargo clauses can have whitespace, so we need to strip each block: 35 | blocks = [b.strip() for b in expression.split(",")] 36 | clause = Always() 37 | for block in blocks: 38 | if not cls.NAIVE_SPEC.match(block): 39 | raise ValueError("Invalid simple block %r" % block) 40 | clause &= cls.parse_block(block) 41 | 42 | return clause 43 | 44 | def __str__(self): 45 | # remove the whitespace to canonicalize the spec 46 | return ",".join(b.strip() for b in self.expression.split(",")) 47 | 48 | def __or__(self, other): 49 | return CargoSpec(f"{self.expression},{other.expression}") 50 | 51 | 52 | def get_dependencies( 53 | repo: SourceRepository, 54 | check_for_cargo: bool = True, 55 | cache: Optional[PackageCache] = None, 56 | ) -> Iterator[Package]: 57 | if check_for_cargo and shutil.which("cargo") is None: 58 | raise ValueError( 59 | "`cargo` does not appear to be installed! Make sure it is installed and in the PATH." 60 | ) 61 | 62 | metadata = json.loads( 63 | subprocess.check_output(["cargo", "metadata", "--format-version", "1"], cwd=repo.path) 64 | ) 65 | 66 | if "workspace_members" in metadata: 67 | workspace_members = {member[: member.find(" ")] for member in metadata["workspace_members"]} 68 | else: 69 | workspace_members = set() 70 | 71 | for package in metadata["packages"]: 72 | if package["name"] in workspace_members: 73 | _class: Type[Union[SourcePackage, Package]] = SourcePackage 74 | kwargs = {"source_repo": repo} 75 | else: 76 | _class = Package 77 | kwargs = {} 78 | 79 | dependencies: Dict[str, Dependency] = {} 80 | for dep in package["dependencies"]: 81 | if dep["kind"] is not None: 82 | continue 83 | if dep["name"] in dependencies: 84 | dependencies[dep["name"]].semantic_version = dependencies[ 85 | dep["name"] 86 | ].semantic_version | CargoResolver.parse_spec(dep["req"]) 87 | else: 88 | dependencies[dep["name"]] = Dependency( 89 | package=dep["name"], 90 | semantic_version=CargoResolver.parse_spec(dep["req"]), 91 | source=CargoResolver(), 92 | ) 93 | 94 | yield _class( # type: ignore 95 | name=package["name"], 96 | version=Version.coerce(package["version"]), 97 | source="cargo", 98 | dependencies=dependencies.values(), 99 | vulnerabilities=(), 100 | **kwargs, 101 | ) 102 | 103 | 104 | class CargoResolver(DependencyResolver): 105 | name = "cargo" 106 | description = "classifies the dependencies of Rust packages using `cargo metadata`" 107 | 108 | def is_available(self) -> ResolverAvailability: 109 | if shutil.which("cargo") is None: 110 | return ResolverAvailability( 111 | False, 112 | "`cargo` does not appear to be installed! " 113 | "Make sure it is installed and in the PATH.", 114 | ) 115 | return ResolverAvailability(True) 116 | 117 | @classmethod 118 | def parse_spec(cls, spec: str) -> CargoSpec: 119 | return CargoSpec(spec) 120 | 121 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 122 | return bool(self.is_available()) and (repo.path / "Cargo.toml").exists() 123 | 124 | def resolve_from_source( 125 | self, repo: SourceRepository, cache: Optional[PackageCache] = None 126 | ) -> Optional[SourcePackage]: 127 | if not self.can_resolve_from_source(repo): 128 | return None 129 | result = None 130 | for package in get_dependencies(repo, check_for_cargo=False): 131 | if isinstance(package, SourcePackage): 132 | result = package 133 | else: 134 | if cache is not None: 135 | cache.add(package) 136 | for dep in package.dependencies: 137 | if not cache.was_resolved(dep): 138 | cache.set_resolved(dep) 139 | return result 140 | 141 | def resolve(self, dependency: Dependency) -> Iterator[Package]: 142 | """search_result = subprocess.check_output(["cargo", "search", "--limit", "100", str(dependency.package)]).decode() 143 | for line in search_result.splitlines(): 144 | pkgid = (line.split("#", 1)[0].strip()) 145 | if pkgid.startswith(f"{dependency.package}"): 146 | break 147 | else: 148 | return 149 | """ 150 | pkgid = dependency.package 151 | 152 | # Need to translate a semantic version into a cargo semantic version 153 | # https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#caret-requirements 154 | # caret requirement 155 | semantic_version = str(dependency.semantic_version) 156 | semantic_versions = semantic_version.split(",") 157 | cache = InMemoryPackageCache() 158 | with cache: 159 | for semantic_version in map(str.strip, semantic_versions): 160 | if semantic_version[0].isnumeric(): 161 | semantic_version = "=" + semantic_version 162 | pkgid = f'{pkgid.split("=")[0].strip()} = "{semantic_version}"' 163 | 164 | logger.debug(f"Found {pkgid} for {dependency} in crates.io") 165 | with tempfile.TemporaryDirectory() as tmpdir: 166 | subprocess.check_output(["cargo", "init"], cwd=tmpdir) 167 | with open(Path(tmpdir) / "Cargo.toml", "a") as f: 168 | f.write(f"{pkgid}\n") 169 | self.resolve_from_source(SourceRepository(path=tmpdir), cache) 170 | cache.set_resolved(dependency) 171 | # TODO: propagate up any other info we have in this cache 172 | return cache.match(dependency) 173 | -------------------------------------------------------------------------------- /it_depends/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from contextlib import contextmanager 3 | import json 4 | from pathlib import Path 5 | import sys 6 | from typing import Iterator, Optional, Sequence, TextIO, Union 7 | import webbrowser 8 | 9 | from sqlalchemy.exc import OperationalError 10 | 11 | from .audit import vulnerabilities 12 | from .db import DEFAULT_DB_PATH, DBPackageCache 13 | from .dependencies import Dependency, resolvers, resolve, SourceRepository 14 | from .it_depends import version as it_depends_version 15 | from .html import graph_to_html 16 | from .resolver import resolve_sbom 17 | from .sbom import cyclonedx_to_json 18 | 19 | 20 | @contextmanager 21 | def no_stdout() -> Iterator[TextIO]: 22 | """A context manager that redirects STDOUT to STDERR""" 23 | saved_stdout = sys.stdout 24 | sys.stdout = sys.stderr 25 | try: 26 | yield saved_stdout 27 | finally: 28 | sys.stdout = saved_stdout 29 | 30 | 31 | def parse_path_or_package_name( 32 | path_or_name: str, 33 | ) -> Union[SourceRepository, Dependency]: 34 | repo_path = Path(path_or_name) 35 | try: 36 | dependency: Optional[Dependency] = Dependency.from_string(path_or_name) 37 | except ValueError as e: 38 | if str(e).endswith("is not a known resolver") and not repo_path.exists(): 39 | raise ValueError(f"Unknown resolver: {path_or_name}") 40 | dependency = None 41 | if dependency is None or repo_path.exists(): 42 | return SourceRepository(path_or_name) 43 | else: 44 | return dependency 45 | 46 | 47 | def main(argv: Optional[Sequence[str]] = None) -> int: 48 | if argv is None: 49 | argv = sys.argv 50 | 51 | parser = argparse.ArgumentParser(description="a source code dependency analyzer") 52 | 53 | parser.add_argument( 54 | "PATH_OR_NAME", 55 | nargs="?", 56 | type=str, 57 | default=".", 58 | help="path to the directory to analyze, or a package name in the form of " 59 | "RESOLVER_NAME:PACKAGE_NAME[@OPTIONAL_VERSION], where RESOLVER_NAME is a resolver listed " 60 | 'in `it-depends --list`. For example: "pip:numpy", "apt:libc6@2.31", or ' 61 | '"npm:lodash@>=4.17.0".', 62 | ) 63 | 64 | parser.add_argument( 65 | "--audit", 66 | "-a", 67 | action="store_true", 68 | help="audit packages for known vulnerabilities using " "Google OSV", 69 | ) 70 | parser.add_argument("--list", "-l", action="store_true", help="list available package resolver") 71 | parser.add_argument( 72 | "--database", 73 | "-db", 74 | type=str, 75 | nargs="?", 76 | default=DEFAULT_DB_PATH, 77 | help='alternative path to load/store the database, or ":memory:" to cache all results in ' 78 | f"memory rather than reading/writing to disk (default is {DEFAULT_DB_PATH!s})", 79 | ) 80 | parser.add_argument( 81 | "--clear-cache", 82 | action="store_true", 83 | help="clears the database specified by `--database` " 84 | "(equivalent to deleting the database file)", 85 | ) 86 | parser.add_argument( 87 | "--compare", 88 | "-c", 89 | nargs="?", 90 | type=str, 91 | help="compare PATH_OR_NAME to another package specified according to the same rules as " 92 | "PATH_OR_NAME; this option will override the --output-format option and will instead " 93 | "output a floating point similarity metric. By default, the metric will be in the range" 94 | "[0, ∞), with zero meaning that the dependency graphs are identical. For a metric in the " 95 | "range [0, 1], see the `--normalize` option.", 96 | ) 97 | parser.add_argument( 98 | "--normalize", 99 | "-n", 100 | action="store_true", 101 | help="Used in conjunction with `--compare`, this will change the output metric to be in the " 102 | "range [0, 1] where 1 means the graphs are identical and 0 means the graphs are as " 103 | "different as possible.", 104 | ) 105 | parser.add_argument( 106 | "--output-format", 107 | "-f", 108 | choices=("json", "dot", "html", "cyclonedx"), 109 | default="json", 110 | help="how the output should be formatted (default is JSON); note that `cyclonedx` will output a single " 111 | "satisfying dependency resolution rather than the universe of all possible resolutions " 112 | "(see `--newest-resolution`)", 113 | ) 114 | parser.add_argument("--latest-resolution", "-lr", action="store_true", 115 | help="by default, the `cyclonedx` output format emits a single satisfying dependency " 116 | "resolution containing the oldest versions of all of the packages possible; this option " 117 | "instead returns the latest latest possible resolution") 118 | parser.add_argument( 119 | "--output-file", 120 | "-o", 121 | type=str, 122 | default=None, 123 | help="path to the output file; default is to write output to STDOUT", 124 | ) 125 | parser.add_argument( 126 | "--force", 127 | action="store_true", 128 | help="force overwriting the output file even if it already exists", 129 | ) 130 | parser.add_argument( 131 | "--all-versions", 132 | action="store_true", 133 | help="for `--output-format html`, this option will emit all package versions that satisfy each " 134 | "dependency", 135 | ) 136 | parser.add_argument( 137 | "--depth-limit", 138 | "-d", 139 | type=int, 140 | default=-1, 141 | help="depth limit for recursively solving dependencies (default is -1 to resolve all " 142 | "dependencies)", 143 | ) 144 | parser.add_argument( 145 | "--max-workers", 146 | "-j", 147 | type=int, 148 | default=None, 149 | help="maximum number of jobs to run concurrently" " (default is # of CPUs)", 150 | ) 151 | parser.add_argument( 152 | "--version", 153 | "-v", 154 | action="store_true", 155 | help="print it-depends' version and exit", 156 | ) 157 | 158 | args = parser.parse_args(argv[1:]) 159 | 160 | if args.version: 161 | sys.stderr.write("it-depends version ") 162 | sys.stderr.flush() 163 | version = it_depends_version() 164 | sys.stdout.write(str(version)) 165 | sys.stdout.flush() 166 | sys.stderr.write("\n") 167 | return 0 168 | 169 | try: 170 | repo = parse_path_or_package_name(args.PATH_OR_NAME) 171 | 172 | if args.compare is not None: 173 | to_compare: Optional[Union[SourceRepository, Dependency]] = parse_path_or_package_name( 174 | args.compare 175 | ) 176 | else: 177 | to_compare = None 178 | except ValueError as e: 179 | sys.stderr.write(str(e)) 180 | sys.stderr.write("\n\n") 181 | return 1 182 | 183 | if args.clear_cache: 184 | db_path = Path(args.database) 185 | if db_path.exists(): 186 | if sys.stderr.isatty() and sys.stdin.isatty(): 187 | while True: 188 | if args.database != DEFAULT_DB_PATH: 189 | sys.stderr.write(f"Cache file: {db_path.absolute()}\n") 190 | sys.stderr.write( 191 | "Deleting the cache will require all past resoltuions to be recalculated, which " 192 | "can be slow.\nAre you sure? [yN] " 193 | ) 194 | try: 195 | choice = input("").lower().strip() 196 | except KeyboardInterrupt: 197 | return 1 198 | if choice == "y": 199 | db_path.unlink() 200 | sys.stderr.write("Cache cleared.\n") 201 | break 202 | elif choice == "n" or choice == "": 203 | break 204 | else: 205 | db_path.unlink() 206 | sys.stderr.write("Cache cleared.\n") 207 | 208 | if args.list: 209 | sys.stdout.flush() 210 | if isinstance(repo, SourceRepository): 211 | path = repo.path.absolute() 212 | else: 213 | path = args.PATH_OR_NAME 214 | sys.stderr.write(f"Available resolvers for {path}:\n") 215 | sys.stderr.flush() 216 | for name, classifier in sorted((c.name, c) for c in resolvers()): 217 | sys.stdout.write(name + " " * (12 - len(name))) 218 | sys.stdout.flush() 219 | available = classifier.is_available() 220 | if not available: 221 | sys.stderr.write(f"\tnot available: {available.reason}") 222 | sys.stderr.flush() 223 | elif isinstance(repo, SourceRepository) and not classifier.can_resolve_from_source( 224 | repo 225 | ): 226 | sys.stderr.write("\tincompatible with this path") 227 | sys.stderr.flush() 228 | elif isinstance(repo, Dependency) and repo.source != classifier.name: 229 | sys.stderr.write("\tincompatible with this package specifier") 230 | else: 231 | sys.stderr.write("\tenabled") 232 | sys.stderr.flush() 233 | 234 | sys.stdout.write("\n") 235 | sys.stdout.flush() 236 | return 0 237 | 238 | try: 239 | output_file = None 240 | with no_stdout() as real_stdout: 241 | if args.output_file is None or args.output_file == "-": 242 | output_file = real_stdout 243 | elif not args.force and Path(args.output_file).exists(): 244 | sys.stderr.write( 245 | f"{args.output_file} already exists!\nRe-run with `--force` to overwrite the file.\n" 246 | ) 247 | return 1 248 | else: 249 | output_file = open(args.output_file, "w") 250 | with DBPackageCache(args.database) as cache: 251 | try: 252 | package_list = resolve( 253 | repo, 254 | cache=cache, 255 | depth_limit=args.depth_limit, 256 | max_workers=args.max_workers, 257 | ) 258 | except ValueError as e: 259 | if not args.clear_cache or args.PATH_OR_NAME.strip(): 260 | sys.stderr.write(f"{e!s}\n") 261 | return 1 262 | if not package_list: 263 | sys.stderr.write( 264 | f"Try --list to check for available resolvers for {args.PATH_OR_NAME}\n" 265 | ) 266 | sys.stderr.flush() 267 | 268 | # TODO: Should the cache be updated instead???? 269 | if args.audit: 270 | package_list = vulnerabilities(package_list) 271 | 272 | if to_compare is not None: 273 | to_compare_list = resolve( 274 | to_compare, 275 | cache=cache, 276 | depth_limit=args.depth_limit, 277 | max_workers=args.max_workers, 278 | ) 279 | output_file.write( 280 | str( 281 | package_list.to_graph().distance_to( 282 | to_compare_list.to_graph(), normalize=args.normalize 283 | ) 284 | ) 285 | ) 286 | output_file.write("\n") 287 | elif args.output_format == "dot": 288 | output_file.write(cache.to_dot(package_list.source_packages).source) 289 | elif args.output_format == "html": 290 | output_file.write( 291 | graph_to_html(package_list, collapse_versions=not args.all_versions) 292 | ) 293 | if output_file is not real_stdout: 294 | output_file.flush() 295 | webbrowser.open(output_file.name) 296 | elif args.output_format == "json": 297 | output_file.write(json.dumps(package_list.to_obj(), indent=4)) 298 | elif args.output_format == "cyclonedx": 299 | sbom = None 300 | for p in package_list.source_packages: 301 | for bom in resolve_sbom(p, package_list, order_ascending=not args.latest_resolution): 302 | if sbom is None: 303 | sbom = bom 304 | else: 305 | sbom = sbom | bom 306 | # only get the first resolution 307 | # TODO: Provide a means for enumerating all valid SBOMs 308 | break 309 | output_file.write(cyclonedx_to_json(sbom.to_cyclonedx())) 310 | else: 311 | raise NotImplementedError(f"TODO: Implement output format {args.output_format}") 312 | except OperationalError as e: 313 | sys.stderr.write( 314 | f"Database error: {e!r}\n\nThis can occur if your database was created with an older version " 315 | f"of it-depends and was unable to be updated. If you remove {args.database} or run " 316 | "`it-depends --clear-cache` and try again, the database will automatically be rebuilt from " 317 | "scratch." 318 | ) 319 | return 1 320 | finally: 321 | if output_file is not None and output_file != sys.stdout: 322 | sys.stderr.write(f"Output saved to {output_file.name}\n") 323 | output_file.close() 324 | 325 | return 0 326 | -------------------------------------------------------------------------------- /it_depends/db.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Dict, FrozenSet, Iterable, Iterator, Optional, Tuple, Union 3 | 4 | from semantic_version import Version 5 | from sqlalchemy import ( 6 | Column, 7 | create_engine, 8 | distinct, 9 | ForeignKey, 10 | Integer, 11 | String, 12 | UniqueConstraint, 13 | ) 14 | from sqlalchemy.ext.declarative import declarative_base 15 | from sqlalchemy.ext.hybrid import hybrid_property 16 | from sqlalchemy.orm import relationship, sessionmaker 17 | 18 | from .dependencies import ( 19 | resolver_by_name, 20 | Dependency, 21 | DependencyResolver, 22 | Package, 23 | SemanticVersion, 24 | PackageCache, 25 | ) 26 | from .it_depends import APP_DIRS 27 | 28 | DEFAULT_DB_PATH = Path(APP_DIRS.user_cache_dir) / "dependencies.sqlite" 29 | 30 | Base = declarative_base() 31 | 32 | 33 | class Resolution(Base): # type: ignore 34 | __tablename__ = "resolutions" 35 | 36 | id = Column(Integer, primary_key=True) 37 | package = Column(String, nullable=False) 38 | version = Column(String, nullable=True) 39 | source = Column(String, nullable=True) 40 | 41 | __table_args__ = ( 42 | UniqueConstraint("package", "version", "source", name="resolution_unique_constraint"), 43 | ) 44 | 45 | 46 | class Updated(Base): # type: ignore 47 | __tablename__ = "updated" 48 | 49 | id = Column(Integer, primary_key=True) 50 | package = Column(String, nullable=False) 51 | version = Column(String, nullable=True) 52 | source = Column(String, nullable=True) 53 | resolver = Column(String, nullable=True) 54 | 55 | __table_args__ = ( 56 | UniqueConstraint( 57 | "package", "version", "source", "resolver", name="updated_unique_constraint" 58 | ), 59 | ) 60 | 61 | 62 | class DBDependency(Base, Dependency): # type: ignore 63 | __tablename__ = "dependencies" 64 | 65 | id = Column(Integer, primary_key=True) 66 | from_package_id = Column(Integer, ForeignKey("packages.id")) 67 | from_package = relationship("DBPackage", back_populates="raw_dependencies") 68 | source = Column(String, nullable=False) 69 | package = Column(String, nullable=False) 70 | semantic_version_string = Column("semantic_version", String, nullable=True) 71 | 72 | __table_args__ = ( 73 | UniqueConstraint( 74 | "from_package_id", 75 | "package", 76 | "semantic_version", 77 | name="dependency_unique_constraint", 78 | ), 79 | ) 80 | 81 | def __init__(self, package: "DBPackage", dep: Dependency): 82 | # We intentionally skip calling super().__init__() 83 | self.from_package_id = package.id 84 | self.source = dep.source 85 | self.package = dep.package 86 | self.semantic_version = dep.semantic_version # type: ignore 87 | 88 | @hybrid_property # type: ignore 89 | def semantic_version(self) -> SemanticVersion: 90 | resolver = resolver_by_name(self.source) 91 | return resolver.parse_spec(self.semantic_version_string) 92 | 93 | @semantic_version.setter # type: ignore 94 | def semantic_version(self, new_version: Union[SemanticVersion, str]): 95 | self.semantic_version_string = str(new_version) 96 | 97 | 98 | class DependencyMapping: 99 | def __init__(self, package: "DBPackage"): 100 | super().__init__() 101 | self._deps: Dict[str, Dependency] = { 102 | dep.package: Dependency( 103 | package=dep.package, 104 | source=dep.source, 105 | semantic_version=dep.semantic_version, 106 | ) 107 | for dep in package.raw_dependencies 108 | } 109 | 110 | def items(self) -> Iterator[Tuple[str, Dependency]]: 111 | yield from self._deps.items() 112 | 113 | def keys(self) -> Iterable[str]: 114 | return self._deps.keys() 115 | 116 | def values(self) -> Iterable[Dependency]: 117 | return self._deps.values() 118 | 119 | def __setitem__(self, dep_name: str, dep: Dependency): 120 | self._deps[dep_name] = dep 121 | 122 | def __delitem__(self, dep_name: str): 123 | pass 124 | 125 | def __getitem__(self, package_name: str) -> Dependency: 126 | return self._deps[package_name] 127 | 128 | def __len__(self) -> int: 129 | return len(self._deps) 130 | 131 | def __iter__(self) -> Iterator[str]: 132 | return iter(self._deps) 133 | 134 | 135 | class DBPackage(Base, Package): # type: ignore 136 | __tablename__ = "packages" 137 | 138 | id = Column(Integer, primary_key=True) 139 | name = Column(String, nullable=False) 140 | version_str = Column("version", String, nullable=False) 141 | source = Column("source", String, nullable=False) 142 | 143 | __table_args__ = ( 144 | UniqueConstraint("name", "version", "source", name="package_unique_constraint"), 145 | ) 146 | 147 | raw_dependencies = relationship( 148 | "DBDependency", 149 | back_populates="from_package", 150 | cascade="all, delete, delete-orphan", 151 | ) 152 | 153 | def __init__(self, package: Package): 154 | # We intentionally skip calling super().__init__() 155 | self.name = package.name 156 | self.version = package.version 157 | self.source = package.source 158 | 159 | @property 160 | def resolver(self) -> DependencyResolver: 161 | return resolver_by_name(self.source) 162 | 163 | @staticmethod 164 | def from_package(package: Package, session) -> "DBPackage": 165 | if not isinstance(package, DBPackage): 166 | dep_pkg = package 167 | package = DBPackage(package) 168 | session.add(package) 169 | session.flush() 170 | session.add_all([DBDependency(package, dep) for dep in dep_pkg.dependencies]) 171 | else: 172 | session.add(package) 173 | return package 174 | 175 | def to_package(self) -> Package: 176 | return Package( 177 | source=self.source, 178 | name=self.name, 179 | version=self.version, 180 | dependencies=( 181 | Dependency( 182 | package=dep.package, 183 | semantic_version=dep.semantic_version, 184 | source=dep.source, 185 | ) 186 | for dep in self.raw_dependencies 187 | ), 188 | ) 189 | 190 | @property 191 | def version(self) -> Version: 192 | return self.resolver.parse_version(self.version_str) 193 | 194 | @version.setter 195 | def version(self, new_version: Union[Version, str]): 196 | self.version_str = str(new_version) 197 | 198 | @property 199 | def dependencies(self) -> DependencyMapping: # type: ignore 200 | return DependencyMapping(self) 201 | 202 | 203 | class SourceFilteredPackageCache(PackageCache): 204 | def __init__(self, source: Optional[str], parent: "DBPackageCache"): 205 | super().__init__() 206 | self.source: Optional[str] = source 207 | self.parent: DBPackageCache = parent 208 | 209 | def __len__(self): 210 | return ( 211 | self.parent.session.query(DBPackage) 212 | .filter(DBPackage.source_name.like(self.source)) 213 | .count() 214 | ) 215 | 216 | def __iter__(self) -> Iterator[Package]: 217 | yield from [ 218 | p.to_package() 219 | for p in self.parent.session.query(DBPackage) 220 | .filter(DBPackage.source_name.like(self.source)) 221 | .all() 222 | ] 223 | 224 | def was_resolved(self, dependency: Dependency) -> bool: 225 | return self.parent.was_resolved(dependency) 226 | 227 | def set_resolved(self, dependency: Dependency): 228 | self.parent.set_resolved(dependency) 229 | 230 | def from_source(self, source: Optional[str]) -> "PackageCache": 231 | return SourceFilteredPackageCache(source, self.parent) 232 | 233 | def package_versions(self, package_name: str) -> Iterator[Package]: 234 | yield from [ 235 | p.to_package() 236 | for p in self.parent.session.query(DBPackage) 237 | .filter( 238 | DBPackage.name.like(package_name), 239 | DBPackage.source_name.like(self.source), 240 | ) 241 | .all() 242 | ] 243 | 244 | def package_full_names(self) -> FrozenSet[str]: 245 | return frozenset( 246 | self.parent.session.query(distinct(DBPackage.name)) 247 | .filter(DBPackage.source_name.like(self.source)) 248 | .all() 249 | ) 250 | 251 | def match(self, to_match: Union[str, Package, Dependency]) -> Iterator[Package]: 252 | return self.parent.match(to_match) 253 | 254 | def add(self, package: Package): 255 | return self.parent.add(package) 256 | 257 | def set_updated(self, package: Package, resolver: str): 258 | return self.parent.set_updated(package, resolver) 259 | 260 | def was_updated(self, package: Package, resolver: str) -> bool: 261 | return self.parent.was_updated(package, resolver) 262 | 263 | def updated_by(self, package: Package) -> FrozenSet[str]: 264 | return self.parent.updated_by(package) 265 | 266 | 267 | class DBPackageCache(PackageCache): 268 | def __init__(self, db: Union[str, Path] = ":memory:"): 269 | super().__init__() 270 | if db == ":memory:": 271 | db = "sqlite:///:memory:" 272 | elif db == "sqlite:///:memory:": 273 | pass 274 | elif isinstance(db, str): 275 | if db.startswith("sqlite:///"): 276 | db = db[len("sqlite:///") :] 277 | db = Path(db) 278 | if isinstance(db, Path): 279 | db.parent.mkdir(parents=True, exist_ok=True) 280 | db = f"sqlite:///{db.absolute()!s}?check_same_thread=False" 281 | self.db: str = db 282 | self._session = None 283 | 284 | def open(self): 285 | if isinstance(self.db, str): 286 | db = create_engine(self.db) 287 | else: 288 | db = self.db 289 | self._session = sessionmaker(bind=db)() 290 | Base.metadata.create_all(db) 291 | 292 | def close(self): 293 | self._session = None 294 | 295 | @property 296 | def session(self): 297 | return self._session 298 | 299 | def add(self, package: Package): 300 | self.extend((package,)) 301 | 302 | def extend(self, packages: Iterable[Package]): 303 | for package in packages: 304 | for existing in self.match(package): 305 | if len(existing.dependencies) > len(package.dependencies): 306 | raise ValueError( 307 | f"Package {package!s} has already been resolved with more dependencies: " 308 | f"{existing!s}" 309 | ) 310 | elif existing.dependencies != package.dependencies: 311 | existing.dependencies = package.dependencies 312 | self.session.commit() 313 | found_existing = True 314 | break 315 | else: 316 | found_existing = False 317 | if found_existing: 318 | continue 319 | if isinstance(package, DBPackage): 320 | self.session.add(package) 321 | else: 322 | _ = DBPackage.from_package(package, self.session) 323 | self.session.commit() 324 | 325 | def __len__(self): 326 | return self.session.query(DBPackage).count() 327 | 328 | def __iter__(self) -> Iterator[Package]: 329 | yield from self.session.query(DBPackage).all() 330 | 331 | def from_source(self, source: Optional[str]) -> SourceFilteredPackageCache: 332 | return SourceFilteredPackageCache(source, self) 333 | 334 | def package_versions(self, package_full_name: str) -> Iterator[Package]: 335 | yield from [ 336 | p.to_package() 337 | for p in self.session.query(DBPackage) 338 | .filter(DBPackage.name.like(package_full_name)) 339 | .all() 340 | ] 341 | 342 | def package_full_names(self) -> FrozenSet[str]: 343 | return frozenset( 344 | f"{result[0]}:{result[1]}" 345 | for result in self.session.query( 346 | distinct(DBPackage.source), distinct(DBPackage.name) 347 | ).all() 348 | ) 349 | 350 | def _make_query(self, to_match: Union[str, Package], source: Optional[str] = None): 351 | if source is None and isinstance(to_match, Package): 352 | source = to_match.source 353 | if source is not None: 354 | filters: Tuple[Any, ...] = (DBPackage.source.like(source),) 355 | else: 356 | filters = () 357 | if isinstance(to_match, Package): 358 | return self.session.query(DBPackage).filter( 359 | DBPackage.name.like(to_match.name), 360 | DBPackage.version_str.like(str(to_match.version)), 361 | *filters, 362 | ) 363 | else: 364 | return self.session.query(DBPackage).filter(DBPackage.name.like(to_match), *filters) 365 | 366 | def match(self, to_match: Union[str, Package, Dependency]) -> Iterator[Package]: 367 | if isinstance(to_match, Dependency): 368 | for package in self._make_query(to_match.package, source=to_match.source): 369 | if package.version in to_match.semantic_version: 370 | yield package.to_package() 371 | else: 372 | if isinstance(to_match, Package): 373 | source: Optional[str] = to_match.source 374 | else: 375 | source = None 376 | # we intentionally build a list before yielding so that we don't keep the session query lingering 377 | yield from [ 378 | package.to_package() for package in self._make_query(to_match, source=source).all() 379 | ] 380 | 381 | def was_resolved(self, dependency: Dependency) -> bool: 382 | return ( 383 | self.session.query(Resolution) 384 | .filter( 385 | Resolution.package.like(dependency.package), 386 | Resolution.version == str(dependency.semantic_version), 387 | Resolution.source.like(dependency.source), 388 | ) 389 | .limit(1) 390 | .count() 391 | > 0 392 | ) 393 | 394 | def set_resolved(self, dependency: Dependency): 395 | if self.was_resolved(dependency): 396 | return 397 | self.session.add( 398 | Resolution( 399 | package=dependency.package, 400 | version=str(dependency.semantic_version), 401 | source=dependency.source, 402 | ) 403 | ) 404 | self.session.commit() 405 | 406 | def updated_by(self, package: Package) -> FrozenSet[str]: 407 | return frozenset( 408 | u.resolver 409 | for u in self.session.query(Updated).filter( 410 | Updated.source.like(package.source), 411 | Updated.package.like(package.name), 412 | Updated.version == str(package.version), 413 | ) 414 | ) 415 | 416 | def was_updated(self, package: Package, resolver: str) -> bool: 417 | if package.source == resolver: 418 | return True 419 | return ( 420 | self.session.query(Updated) 421 | .filter( 422 | Updated.source.like(package.source), 423 | Updated.package.like(package.name), 424 | Updated.version.like(str(package.version)), 425 | Updated.resolver.like(resolver), 426 | ) 427 | .limit(1) 428 | .count() 429 | > 0 430 | ) 431 | 432 | def set_updated(self, package: Package, resolver: str): 433 | if self.was_updated(package, resolver): 434 | return 435 | self.session.add( 436 | Updated( 437 | package=package.name, 438 | version=str(package.version), 439 | source=package.source, 440 | resolver=resolver, 441 | ) 442 | ) 443 | self.session.commit() 444 | -------------------------------------------------------------------------------- /it_depends/docker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import shutil 4 | import subprocess 5 | import sys 6 | import os 7 | from pathlib import Path 8 | from tempfile import mkdtemp 9 | from tqdm import tqdm 10 | from typing import Dict, Iterable, List, Optional, Tuple, Union 11 | 12 | import docker 13 | from docker.errors import NotFound as ImageNotFound, DockerException 14 | from docker.models.images import Image 15 | 16 | from . import version as it_depends_version 17 | 18 | 19 | def _discover_podman_socket(): 20 | """Try to discover a Podman socket. 21 | 22 | Discovery is performed in this order: 23 | 24 | * If the user is non-root, rootless Podman 25 | * If the user is root, rooted Podman 26 | """ 27 | 28 | euid = os.geteuid() 29 | if euid != 0: 30 | # Non-root: use XDG_RUNTIME_DIR to try and find the user's Podman socket, 31 | # falling back on the systemd-enforced default. 32 | # Ref: https://docs.podman.io/en/latest/markdown/podman-system-service.1.html 33 | runtime_dir = Path(os.environ.get("XDG_RUNTIME_DIR", f"/run/user/{euid}/")) 34 | if not runtime_dir.is_dir(): 35 | return None 36 | 37 | sock_path = runtime_dir / "podman/podman.sock" 38 | else: 39 | # Root: check for /run/podman/podman.sock and nothing else. 40 | sock_path = Path("/run/podman/podman.sock") 41 | 42 | if not sock_path.is_socket(): 43 | return None 44 | 45 | return f"unix://{sock_path}" 46 | 47 | 48 | class Dockerfile: 49 | def __init__(self, path: Path): 50 | self._path: Path = path 51 | self._len: Optional[int] = None 52 | self._line_offsets: Dict[int, int] = {} 53 | 54 | @property 55 | def path(self) -> Path: 56 | return self._path 57 | 58 | @path.setter 59 | def path(self, new_path: Path): 60 | self._path = new_path 61 | self._len = None 62 | self._line_offsets = {} 63 | 64 | def __enter__(self) -> "Dockerfile": 65 | return self 66 | 67 | def __exit__(self, exc_type, exc_val, exc_tb): 68 | pass 69 | 70 | def exists(self) -> bool: 71 | return self.path.exists() 72 | 73 | def dir(self) -> Path: 74 | return self.path.parent 75 | 76 | def __len__(self) -> int: 77 | """Returns the number of lines in the file""" 78 | if self._len is None: 79 | self._len = 0 80 | self._line_offsets[0] = 0 # line 0 starts at offset 0 81 | offset = 0 82 | with open(self.path, "rb") as f: 83 | while True: 84 | chunk = f.read(1) 85 | if len(chunk) == 0: 86 | break 87 | elif chunk == b"\n": 88 | self._len += 1 89 | self._line_offsets[self._len] = offset + 1 90 | offset += 1 91 | return self._len 92 | 93 | def get_line(self, step_command: str, starting_line: int = 0) -> Optional[int]: 94 | """Returns the line number of the associated step command""" 95 | if self._len is None: 96 | # we need to call __len__ to set self._line_offsets 97 | _ = len(self) 98 | if starting_line not in self._line_offsets: 99 | return None 100 | with open(self.path, "r") as f: 101 | f.seek(self._line_offsets[starting_line]) 102 | line_offset = 0 103 | while True: 104 | line = f.readline() 105 | if line == "": 106 | break 107 | elif line == step_command: 108 | return starting_line + line_offset 109 | line_offset += 1 110 | return None 111 | 112 | 113 | class InMemoryFile: 114 | def __init__(self, filename: str, content: bytes): 115 | self.filename: str = filename 116 | self.content: bytes = content 117 | 118 | 119 | class InMemoryDockerfile(Dockerfile): 120 | def __init__(self, content: str, local_files: Iterable[InMemoryFile] = ()): 121 | super().__init__(None) # type: ignore 122 | self.content: str = content 123 | self.local_files: List[InMemoryFile] = list(local_files) 124 | self._entries: int = 0 125 | self._tmpdir: Optional[Path] = None 126 | 127 | @Dockerfile.path.getter # type: ignore 128 | def path(self) -> Path: 129 | path = super().path 130 | if path is None: 131 | raise ValueError( 132 | "InMemoryDockerfile only has a valid path when inside of its context manager" 133 | ) 134 | return path 135 | 136 | def __enter__(self) -> "InMemoryDockerfile": 137 | self._entries += 1 138 | if self._entries == 1: 139 | self._tmpdir = Path(mkdtemp()) 140 | for file in self.local_files: 141 | with open(self._tmpdir / file.filename, "wb") as f: 142 | f.write(file.content) 143 | self.path = self._tmpdir / "Dockerfile" 144 | with open(self.path, "w") as d: 145 | d.write(self.content) 146 | return self 147 | 148 | def __exit__(self, exc_type, exc_val, exc_tb): 149 | self._entries -= 1 150 | if self._entries == 0: 151 | self.path.unlink() 152 | shutil.rmtree(self._tmpdir) 153 | self.path = None # type: ignore 154 | 155 | 156 | class DockerContainer: 157 | def __init__( 158 | self, 159 | image_name: str, 160 | dockerfile: Optional[Dockerfile] = None, 161 | tag: Optional[str] = None, 162 | ): 163 | self.image_name: str = image_name 164 | if tag is None: 165 | self.tag: str = it_depends_version() 166 | else: 167 | self.tag = tag 168 | self._client: Optional[docker.DockerClient] = None 169 | self.dockerfile: Optional[Dockerfile] = dockerfile 170 | 171 | def run( 172 | self, 173 | *args: str, 174 | check_existence: bool = True, 175 | rebuild: bool = True, 176 | build_if_necessary: bool = True, 177 | remove: bool = True, 178 | interactive: bool = True, 179 | mounts: Optional[Iterable[Tuple[Union[str, Path], Union[str, Path]]]] = None, 180 | privileged: bool = False, 181 | env: Optional[Dict[str, str]] = None, 182 | stdin=None, 183 | stdout=None, 184 | stderr=None, 185 | cwd=None, 186 | ): 187 | if rebuild: 188 | self.rebuild() 189 | elif check_existence and not self.exists(): 190 | if build_if_necessary: 191 | if self.dockerfile is not None and self.dockerfile.exists(): 192 | self.rebuild() 193 | else: 194 | self.pull() 195 | if not self.exists(): 196 | raise ValueError(f"{self.name} does not exist!") 197 | else: 198 | raise ValueError( 199 | f"{self.name} does not exist! Re-run with `build_if_necessary=True` to automatically " 200 | "build it." 201 | ) 202 | if cwd is None: 203 | cwd = str(Path.cwd()) 204 | 205 | # Call out to the actual Docker command instead of the Python API because it has better support for interactive 206 | # TTYs 207 | 208 | if interactive and (stdin is not None or stdout is not None or stderr is not None): 209 | raise ValueError( 210 | "if `interactive == True`, all of `stdin`, `stdout`, and `stderr` must be `None`" 211 | ) 212 | 213 | cmd_args = [str(Path("/usr") / "bin" / "env"), "docker", "run"] 214 | 215 | if interactive: 216 | cmd_args.append("-it") 217 | 218 | if remove: 219 | cmd_args.append("--rm") 220 | 221 | if mounts is not None: 222 | for source, target in mounts: 223 | cmd_args.append("-v") 224 | if not isinstance(source, Path): 225 | source = Path(source) 226 | source = source.absolute() 227 | cmd_args.append(f"{source!s}:{target!s}:cached") 228 | 229 | if env is not None: 230 | for k, v in env.items(): 231 | cmd_args.append("-e") 232 | escaped_value = v.replace('"', '\\"') 233 | cmd_args.append(f"{k}={escaped_value}") 234 | 235 | if privileged: 236 | cmd_args.append("--privileged=true") 237 | 238 | cmd_args.append(self.name) 239 | 240 | cmd_args.extend(args) 241 | 242 | if interactive: 243 | return subprocess.call(cmd_args, cwd=cwd, stdout=sys.stderr) 244 | else: 245 | return subprocess.run(cmd_args, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd) 246 | 247 | # self.client.containers.run(self.name, args, remove=remove, mounts=[ 248 | # Mount(target=str(target), source=str(source), consistency="cached") for source, target in mounts 249 | # ]) 250 | 251 | @property 252 | def name(self) -> str: 253 | return f"{self.image_name}:{self.tag}" 254 | 255 | @property 256 | def client(self) -> docker.DockerClient: 257 | if self._client is None: 258 | self._client = docker.from_env() 259 | return self._client 260 | 261 | def exists(self) -> Optional[Image]: 262 | for image in self.client.images.list(): 263 | if self.name in image.tags: 264 | return image 265 | return None 266 | 267 | def pull(self, latest: bool = False) -> Image: 268 | # We could use the Python API to pull, like this: 269 | # return self.client.images.pull(self.image_name, tag=[self.tag, None][latest]) 270 | # However, that doesn't include progress bars. So call the `docker` command instead: 271 | name = f"{self.image_name}:{[self.tag, 'latest'][latest]}" 272 | try: 273 | subprocess.check_call(["docker", "pull", name]) 274 | for image in self.client.images.list(): 275 | if name in image.tags: 276 | return image 277 | except subprocess.CalledProcessError: 278 | pass 279 | raise ImageNotFound(name) 280 | 281 | def rebuild(self, nocache: bool = False): 282 | if self.dockerfile is None: 283 | _ = self.pull() 284 | return 285 | elif not self.dockerfile.exists(): 286 | raise ValueError("Could not find the Dockerfile.") 287 | # use the low-level APIClient so we can get streaming build status 288 | try: 289 | sock = _discover_podman_socket() 290 | cli = docker.APIClient(base_url=sock) 291 | except DockerException as e: 292 | raise ValueError(f"Could not connect to socket: sock={sock} {e}") from e 293 | with tqdm(desc="Archiving the build directory", unit=" steps", leave=False) as t: 294 | last_line = 0 295 | last_step = None 296 | for raw_line in cli.build( 297 | path=str(self.dockerfile.dir()), 298 | rm=True, 299 | tag=self.name, 300 | nocache=nocache, 301 | forcerm=True, 302 | ): 303 | t.desc = f"Building {self.name}" 304 | for line in raw_line.split(b"\n"): 305 | try: 306 | line = json.loads(line) 307 | except json.decoder.JSONDecodeError: 308 | continue 309 | if "stream" in line: 310 | m = re.match( 311 | r"^Step\s+(\d+)(/(\d+))?\s+:\s+(.+)$", 312 | line["stream"], 313 | re.MULTILINE, 314 | ) 315 | if m: 316 | if m.group(3): 317 | # Docker told us the total number of steps! 318 | total_steps = int(m.group(3)) 319 | current_step = int(m.group(1)) 320 | if last_step is None: 321 | t.total = total_steps 322 | last_step = 0 323 | t.update(current_step - last_step) 324 | last_step = current_step 325 | else: 326 | # Docker didn't tell us the total number of steps, so infer it from our line 327 | # number in the Dockerfile 328 | t.total = len(self.dockerfile) 329 | new_line = self.dockerfile.get_line( 330 | m.group(4), starting_line=last_line 331 | ) 332 | if new_line is not None: 333 | t.update(new_line - last_line) 334 | last_line = new_line 335 | t.write(line["stream"].replace("\n", "").strip()) 336 | -------------------------------------------------------------------------------- /it_depends/go.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from datetime import datetime 3 | from html.parser import HTMLParser 4 | from logging import getLogger 5 | import os 6 | from pathlib import Path 7 | import re 8 | from subprocess import check_call, check_output, DEVNULL, CalledProcessError 9 | from tempfile import TemporaryDirectory 10 | from typing import Iterable, Iterator, List, Optional, Tuple, Union 11 | from urllib import request 12 | from urllib.error import HTTPError, URLError 13 | 14 | from semantic_version import Version 15 | from semantic_version.base import BaseSpec, Range, SimpleSpec 16 | 17 | from .dependencies import ( 18 | Dependency, 19 | DependencyResolver, 20 | SourcePackage, 21 | SourceRepository, 22 | Package, 23 | PackageCache, 24 | SemanticVersion, 25 | ) 26 | from . import vcs 27 | 28 | log = getLogger(__file__) 29 | 30 | GITHUB_URL_MATCH = re.compile( 31 | r"\s*https?://(www\.)?github.com/([^/]+)/(.+?)(\.git)?\s*", re.IGNORECASE 32 | ) 33 | REQUIRE_LINE_REGEX = r"\s*([^\s]+)\s+([^\s]+)\s*(//\s*indirect\s*)?" 34 | REQUIRE_LINE_MATCH = re.compile(REQUIRE_LINE_REGEX) 35 | REQUIRE_MATCH = re.compile(rf"\s*require\s+{REQUIRE_LINE_REGEX}") 36 | REQUIRE_BLOCK_MATCH = re.compile(r"\s*require\s+\(\s*") 37 | MODULE_MATCH = re.compile(r"\s*module\s+([^\s]+)\s*") 38 | 39 | GOPATH: Optional[str] = os.environ.get("GOPATH", None) 40 | 41 | 42 | @dataclass(frozen=True, unsafe_hash=True) 43 | class MetaImport: 44 | prefix: str 45 | vcs: str 46 | repo_root: str 47 | 48 | 49 | class MetadataParser(HTMLParser): 50 | in_meta: bool = False 51 | metadata: List[MetaImport] = [] 52 | 53 | def error(self, message): 54 | pass 55 | 56 | def handle_starttag(self, tag, attrs): 57 | if tag == "meta": 58 | attrs = dict(attrs) 59 | if attrs.get("name", "") == "go-import": 60 | fields = attrs.get("content", "").split(" ") 61 | if len(fields) == 3: 62 | self.metadata.append(MetaImport(*fields)) 63 | 64 | 65 | def git_commit(path: Optional[str] = None) -> Optional[str]: 66 | try: 67 | return check_output(["git", "rev-parse", "HEAD"], cwd=path, stderr=DEVNULL).decode("utf-8") 68 | except CalledProcessError: 69 | return None 70 | 71 | 72 | class GoVersion: 73 | def __init__(self, go_version_string: str): 74 | self.version_string: str = go_version_string.strip() 75 | if self.version_string.startswith("="): 76 | self.version_string = self.version_string[1:] 77 | self.build: bool = False # This is to appease semantic_version.base.SimpleSpec 78 | 79 | def __lt__(self, other): 80 | return self.version_string < str(other) 81 | 82 | def __eq__(self, other): 83 | return isinstance(other, GoVersion) and self.version_string == other.version_string 84 | 85 | def __hash__(self): 86 | return hash(self.version_string) 87 | 88 | def __str__(self): 89 | return self.version_string 90 | 91 | 92 | @BaseSpec.register_syntax 93 | class GoSpec(SimpleSpec): 94 | SYNTAX = "go" 95 | 96 | class Parser(SimpleSpec.Parser): 97 | @classmethod 98 | def parse(cls, expression): 99 | if expression.startswith("="): 100 | expression = expression[1:] 101 | return Range(operator=Range.OP_EQ, target=GoVersion(expression)) 102 | 103 | def __contains__(self, item): 104 | return item == self.clause.target 105 | 106 | 107 | class GoModule: 108 | def __init__(self, name: str, dependencies: Iterable[Tuple[str, str]] = ()): 109 | self.name: str = name 110 | self.dependencies: List[Tuple[str, str]] = list(dependencies) 111 | 112 | @staticmethod 113 | def tag_to_git_hash(tag: str) -> str: 114 | segments = tag.split("-") 115 | if len(segments) == 3: 116 | return segments[-1] 117 | else: 118 | return tag 119 | 120 | @staticmethod 121 | def parse_mod(mod_content: Union[str, bytes]) -> "GoModule": 122 | if isinstance(mod_content, bytes): 123 | mod_content = mod_content.decode("utf-8") 124 | in_require = False 125 | dependencies = [] 126 | name = None 127 | for line in mod_content.split("\n"): 128 | if not in_require: 129 | m = REQUIRE_MATCH.match(line) 130 | if m: 131 | dependencies.append((m.group(1), m.group(2))) 132 | else: 133 | if name is None: 134 | m = MODULE_MATCH.match(line) 135 | if m: 136 | name = m.group(1) 137 | continue 138 | in_require = bool(REQUIRE_BLOCK_MATCH.match(line)) 139 | elif line.strip() == ")": 140 | in_require = False 141 | else: 142 | m = REQUIRE_LINE_MATCH.match(line) 143 | if m: 144 | dependencies.append((m.group(1), m.group(2))) 145 | if name is None: 146 | raise ValueError("Missing `module` line in go mod specification") 147 | return GoModule(name, dependencies) 148 | 149 | @staticmethod 150 | def from_github(github_org: str, github_repo: str, tag: str): 151 | github_url = f"https://raw.githubusercontent.com/{github_org}/{github_repo}/{tag}/go.mod" 152 | try: 153 | with request.urlopen(github_url) as response: 154 | return GoModule.parse_mod(response.read()) 155 | except HTTPError as e: 156 | if e.code == 404: 157 | # Revert to cloning the repo 158 | return GoModule.from_git( 159 | import_path=f"github.com/{github_org}/{github_repo}", 160 | git_url=f"https://github.com/{github_org}/{github_repo}", 161 | tag=tag, 162 | check_for_github=False, 163 | ) 164 | raise 165 | 166 | @staticmethod 167 | def from_git( 168 | import_path: str, 169 | git_url: str, 170 | tag: str, 171 | check_for_github: bool = True, 172 | force_clone: bool = False, 173 | ): 174 | if check_for_github: 175 | m = GITHUB_URL_MATCH.fullmatch(git_url) 176 | if m: 177 | return GoModule.from_github(m.group(2), m.group(3), tag) 178 | log.info(f"Attempting to clone {git_url}") 179 | with TemporaryDirectory() as tempdir: 180 | env = {"GIT_TERMINAL_PROMPT": "0"} 181 | if os.environ.get("GIT_SSH", "") == "" and os.environ.get("GIT_SSH_COMMAND", "") == "": 182 | # disable any ssh connection pooling by git 183 | env["GIT_SSH_COMMAND"] = "ssh -o ControlMaster=no" 184 | if tag == "*" or force_clone: 185 | # this will happen if we are resolving a wildcard, typically if the user called something like 186 | # `it-depends go:github.com/ethereum/go-ethereum` 187 | td = Path(tempdir) 188 | check_call( 189 | ["git", "clone", "--depth", "1", git_url, td.name], 190 | cwd=td.parent, 191 | stderr=DEVNULL, 192 | stdout=DEVNULL, 193 | env=env, 194 | ) 195 | else: 196 | check_call(["git", "init"], cwd=tempdir, stderr=DEVNULL, stdout=DEVNULL) 197 | check_call( 198 | ["git", "remote", "add", "origin", git_url], 199 | cwd=tempdir, 200 | stderr=DEVNULL, 201 | stdout=DEVNULL, 202 | ) 203 | git_hash = GoModule.tag_to_git_hash(tag) 204 | try: 205 | check_call( 206 | ["git", "fetch", "--depth", "1", "origin", git_hash], 207 | cwd=tempdir, 208 | stderr=DEVNULL, 209 | stdout=DEVNULL, 210 | env=env, 211 | ) 212 | except CalledProcessError: 213 | # not all git servers support `git fetch --depth 1` on a hash 214 | try: 215 | check_call( 216 | ["git", "fetch", "origin"], 217 | cwd=tempdir, 218 | stderr=DEVNULL, 219 | stdout=DEVNULL, 220 | env=env, 221 | ) 222 | except CalledProcessError: 223 | log.error(f"Could not clone {git_url} for {import_path!r}") 224 | return GoModule(import_path) 225 | try: 226 | check_call( 227 | ["git", "checkout", git_hash], 228 | cwd=tempdir, 229 | stderr=DEVNULL, 230 | stdout=DEVNULL, 231 | env=env, 232 | ) 233 | except CalledProcessError: 234 | if tag.startswith("="): 235 | return GoModule.from_git(import_path, git_url, tag[1:]) 236 | log.warning( 237 | f"Could not checkout tag {tag} of {git_url} for {import_path!r}; " 238 | "reverting to the main branch" 239 | ) 240 | return GoModule.from_git( 241 | import_path, 242 | git_url, 243 | tag, 244 | check_for_github=False, 245 | force_clone=True, 246 | ) 247 | go_mod_path = Path(tempdir) / "go.mod" 248 | if not go_mod_path.exists(): 249 | # the package likely doesn't have any dependencies 250 | return GoModule(import_path) 251 | with open(Path(tempdir) / "go.mod", "r") as f: 252 | return GoModule.parse_mod(f.read()) 253 | 254 | @staticmethod 255 | def url_for_import_path(import_path: str) -> str: 256 | """ 257 | returns a partially-populated URL for the given Go import path. 258 | 259 | The URL leaves the Scheme field blank so that web.Get will try any scheme 260 | allowed by the selected security mode. 261 | """ 262 | slash = import_path.find("/") 263 | if slash == -1: 264 | raise vcs.VCSResolutionError("import path does not contain a slash") 265 | host, path = import_path[:slash], import_path[slash:] 266 | if "." not in host: 267 | raise vcs.VCSResolutionError("import path does not begin with hostname") 268 | if not path.startswith("/"): 269 | path = f"/{path}" 270 | return f"https://{host}{path}?go-get=1" 271 | 272 | @staticmethod 273 | def meta_imports_for_prefix(import_prefix: str) -> Tuple[str, List[MetaImport]]: 274 | url = GoModule.url_for_import_path(import_prefix) 275 | with request.urlopen(url) as req: 276 | return url, GoModule.parse_meta_go_imports(req.read().decode("utf-8")) 277 | 278 | @staticmethod 279 | def match_go_import(imports: Iterable[MetaImport], import_path: str) -> MetaImport: 280 | match: Optional[MetaImport] = None 281 | for i, m in enumerate(imports): 282 | if not import_path.startswith(m.prefix): 283 | continue 284 | elif match is not None: 285 | if match.vcs == "mod" and m.vcs != "mod": 286 | break 287 | raise ValueError(f"Multiple meta tags match import path {import_path!r}") 288 | match = m 289 | if match is None: 290 | raise ValueError(f"Unable to match import path {import_path!r}") 291 | return match 292 | 293 | @staticmethod 294 | def parse_meta_go_imports(metadata: str) -> List[MetaImport]: 295 | parser = MetadataParser() 296 | parser.feed(metadata) 297 | return parser.metadata 298 | 299 | @staticmethod 300 | def repo_root_for_import_dynamic(import_path: str) -> vcs.Repository: 301 | url = GoModule.url_for_import_path(import_path) 302 | try: 303 | imports = GoModule.parse_meta_go_imports(request.urlopen(url).read().decode("utf-8")) 304 | except (HTTPError, URLError): 305 | raise ValueError(f"Could not download metadata from {url} for import {import_path!s}") 306 | meta_import = GoModule.match_go_import(imports, import_path) 307 | if meta_import.prefix != import_path: 308 | new_url, imports = GoModule.meta_imports_for_prefix(meta_import.prefix) 309 | meta_import2 = GoModule.match_go_import(imports, import_path) 310 | if meta_import != meta_import2: 311 | raise ValueError( 312 | f"{url} and {new_url} disagree about go-import for {meta_import.prefix!r}" 313 | ) 314 | # validateRepoRoot(meta_import.RepoRoot) 315 | if meta_import.vcs == "mod": 316 | the_vcs = vcs.VCS_MOD 317 | else: 318 | the_vcs = vcs.vcs_by_cmd(meta_import.vcs) # type: ignore 319 | if the_vcs is None: 320 | raise ValueError(f"{url}: unknown VCS {meta_import.vcs!r}") 321 | vcs.check_go_vcs(the_vcs, meta_import.prefix) 322 | return vcs.Repository( 323 | repo=meta_import.repo_root, 324 | root=meta_import.prefix, 325 | is_custom=True, 326 | vcs=the_vcs, 327 | ) 328 | 329 | @staticmethod 330 | def repo_root_for_import_path(import_path: str) -> vcs.Repository: 331 | try: 332 | return vcs.resolve(import_path) 333 | except vcs.VCSResolutionError: 334 | pass 335 | return GoModule.repo_root_for_import_dynamic(import_path) 336 | 337 | @staticmethod 338 | def from_import(import_path: str, tag: str) -> "GoModule": 339 | try: 340 | repo = GoModule.repo_root_for_import_path(import_path) 341 | except ValueError as e: 342 | log.warning(str(e)) 343 | return GoModule(import_path) 344 | if repo.vcs.name == "Git": 345 | return GoModule.from_git(import_path, repo.repo, tag) 346 | else: 347 | raise NotImplementedError(f"TODO: add support for VCS type {repo.vcs.name}") 348 | 349 | @staticmethod 350 | def load(name_or_url: str, tag: str = "master") -> "GoModule": 351 | if not name_or_url.startswith("http://") and not name_or_url.startswith("https://"): 352 | return GoModule.from_import(name_or_url, tag) 353 | else: 354 | return GoModule.from_git(name_or_url, name_or_url, tag) 355 | 356 | 357 | class GoResolver(DependencyResolver): 358 | name = "go" 359 | description = "classifies the dependencies of JavaScript packages using `npm`" 360 | 361 | def resolve(self, dependency: Dependency) -> Iterator[Package]: 362 | # assert isinstance(dependency.semantic_version, GoSpec) 363 | version_string = str(dependency.semantic_version) 364 | module = GoModule.from_import(dependency.package, version_string) 365 | yield Package( 366 | name=module.name, 367 | version=GoVersion(version_string), # type: ignore 368 | source=dependency.source, 369 | dependencies=[ 370 | Dependency( 371 | package=package, 372 | semantic_version=GoSpec(f"={version}"), 373 | source=dependency.source, 374 | ) 375 | for package, version in module.dependencies 376 | ], 377 | ) 378 | 379 | @classmethod 380 | def parse_spec(cls, spec: str) -> SemanticVersion: 381 | return GoSpec(spec) 382 | 383 | @classmethod 384 | def parse_version(cls, version_string: str) -> Version: 385 | return GoVersion(version_string) # type: ignore 386 | 387 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 388 | return bool(self.is_available()) and (repo.path / "go.mod").exists() 389 | 390 | def resolve_from_source(self, repo: SourceRepository, cache: Optional[PackageCache] = None): 391 | if not self.can_resolve_from_source(repo): 392 | return None 393 | 394 | with open(repo.path / "go.mod") as f: 395 | module = GoModule.parse_mod(f.read()) 396 | git_hash = git_commit(str(repo.path)) 397 | timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S") 398 | version = f"v0.0.0-{timestamp}-" 399 | if git_hash is None: 400 | version = f"{version}????" 401 | else: 402 | version = f"{version}{git_hash}" 403 | return SourcePackage( 404 | name=module.name, 405 | version=GoVersion(version), # type: ignore 406 | source_repo=repo, 407 | source=self.name, 408 | dependencies=[ 409 | Dependency(package=package, semantic_version=GoSpec(f"={version}"), source=self) 410 | for package, version in module.dependencies 411 | ], 412 | ) 413 | -------------------------------------------------------------------------------- /it_depends/graphs.py: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | Dict, 3 | Generic, 4 | Iterable, 5 | Iterator, 6 | Optional, 7 | Set, 8 | Tuple, 9 | Type, 10 | TypeVar, 11 | Union, 12 | ) 13 | 14 | import networkx as nx 15 | 16 | 17 | T = TypeVar("T") 18 | R = TypeVar("R") 19 | 20 | 21 | class RootedDiGraph(nx.DiGraph, Generic[T, R]): 22 | root_type: Type[R] 23 | 24 | def __init__(self, *args, **kwargs): 25 | super().__init__(*args, **kwargs) 26 | self.roots: Set[R] = set() 27 | self._all_pairs_shortest_paths: Optional[Dict[T, Dict[T, int]]] = None 28 | self._shortest_path_from_root: Optional[Dict[T, int]] = None 29 | 30 | def __init_subclass__(cls, **kwargs): 31 | if not hasattr(cls, "root_type") or getattr(cls, "root_type") is None: 32 | raise TypeError(f"{cls.__name__} must assign a `root_type` class variable") 33 | 34 | def shortest_path_from_root(self, node: T) -> int: 35 | """Returns the shortest path from a root to node. 36 | 37 | If there are no roots in the graph or there is no path from a root, return -1. 38 | 39 | """ 40 | if not self.roots: 41 | return -1 42 | if len(self.roots) > 1: 43 | path_lengths = [self.shortest_path_length(root, node) for root in self.roots] 44 | return min(length for length in path_lengths if length >= 0) 45 | elif self._shortest_path_from_root is None: 46 | self._shortest_path_from_root = nx.single_source_shortest_path_length( 47 | self, next(iter(self.roots)) 48 | ) # type: ignore 49 | return self._shortest_path_from_root[node] 50 | 51 | def shortest_path_length(self, from_node: Union[T, R], to_node: T) -> int: 52 | if self._all_pairs_shortest_paths is None: 53 | self._all_pairs_shortest_paths = dict(nx.all_pairs_shortest_path_length(self)) # type: ignore 54 | if ( 55 | from_node not in self._all_pairs_shortest_paths 56 | or to_node not in self._all_pairs_shortest_paths[from_node] # type: ignore 57 | ): # type: ignore 58 | return -1 59 | return self._all_pairs_shortest_paths[from_node][to_node] # type: ignore 60 | 61 | def _handle_new_node(self, node: T): 62 | if isinstance(node, self.root_type): 63 | self.roots.add(node) 64 | 65 | def _handle_removed_node(self, node: T): 66 | if isinstance(node, self.root_type): 67 | self.roots.remove(node) 68 | 69 | def add_node(self, node_for_adding: T, **attr): 70 | self._handle_new_node(node_for_adding) 71 | return super().add_node(node_for_adding, **attr) 72 | 73 | def add_nodes_from(self, nodes_for_adding: Iterable[T], **attr): 74 | nodes = [] 75 | for node in nodes_for_adding: 76 | self._handle_new_node(node) 77 | nodes.append(node) 78 | return super().add_nodes_from(nodes) 79 | 80 | def add_edge(self, u_of_edge: T, v_of_edge: T, **attr): 81 | self._handle_new_node(u_of_edge) 82 | self._handle_new_node(v_of_edge) 83 | return super().add_edge(u_of_edge, v_of_edge, **attr) 84 | 85 | def add_edges_from( 86 | self, ebunch_to_add: Iterable[Union[Tuple[T, T], Tuple[T, T, Dict]]], **attr 87 | ): 88 | edges = [] 89 | for u, v, *r in ebunch_to_add: 90 | self._handle_new_node(u) 91 | self._handle_new_node(v) 92 | edges.append((u, v, *r)) 93 | super().add_edges_from(ebunch_to_add, **attr) 94 | 95 | def remove_node(self, node_for_removing: T): 96 | self._handle_removed_node(node_for_removing) 97 | return super().remove_node(node_for_removing) 98 | 99 | def remove_nodes_from(self, nodes_for_removing: Iterable[T]): 100 | nodes = [] 101 | for node in nodes_for_removing: 102 | self._handle_removed_node(node) 103 | nodes.append(node) 104 | return super().remove_nodes_from(nodes) 105 | 106 | def find_roots(self) -> "RootedDiGraph[T, T]": 107 | graph: RootedDiGraph[T, T] = RootedDiGraph() 108 | graph.root_type = self.root_type # type: ignore 109 | graph.add_nodes_from(self.nodes) 110 | graph.add_edges_from(self.edges) 111 | graph.roots = {n for n, d in self.in_degree() if d == 0} # type: ignore 112 | return graph 113 | 114 | def __iter__(self) -> Iterator[T]: 115 | yield from super().__iter__() 116 | 117 | def distance_to(self, graph: "RootedDiGraph[T, R]", normalize: bool = False) -> float: 118 | return compare_rooted_graphs(self, graph, normalize) 119 | 120 | 121 | def compare_rooted_graphs( 122 | graph1: RootedDiGraph[T, R], graph2: RootedDiGraph[T, R], normalize: bool = False 123 | ) -> float: 124 | """Calculates the edit distance between two rooted graphs. 125 | 126 | If normalize == False (the default), a value of zero means the graphs are identical, with increasing values 127 | corresponding to the difference between the graphs. 128 | 129 | If normalize == True, the returned value equals 1.0 iff the graphs are identical and values closer to zero if the 130 | graphs are less similar. 131 | 132 | """ 133 | if not graph1.roots or not graph2.roots: 134 | raise ValueError("Both graphs must have at least one root") 135 | nodes1 = {node for node in graph1 if node not in graph1.roots} 136 | nodes2 = {node for node in graph2 if node not in graph2.roots} 137 | common_nodes = nodes1 & nodes2 138 | not_in_2 = nodes1 - nodes2 139 | not_in_1 = nodes2 - nodes1 140 | distance = 0.0 141 | for node in common_nodes: 142 | d1 = graph1.shortest_path_from_root(node) 143 | d2 = graph2.shortest_path_from_root(node) 144 | if d1 != d2: 145 | distance += 1.0 / min(d1, d2) - 1.0 / max(d1, d2) 146 | for node in not_in_2: 147 | distance += 1.0 / max(graph1.shortest_path_from_root(node), 1) 148 | for node in not_in_1: 149 | distance += 1.0 / max(graph2.shortest_path_from_root(node), 1) 150 | if normalize: 151 | if distance > 0.0: 152 | # the graphs are not identical 153 | max_distance = sum( 154 | max(graph1.shortest_path_from_root(node), 1) for node in graph1 155 | ) + sum(max(graph2.shortest_path_from_root(node), 1) for node in graph2) 156 | distance /= max_distance 157 | distance = 1.0 - distance 158 | return distance 159 | -------------------------------------------------------------------------------- /it_depends/html.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Set, Union 2 | 3 | from .dependencies import DependencyGraph, Package, PackageCache 4 | 5 | TEMPLATE: str = """ 6 | 7 | It-Depends | $TITLE 8 | 15 | 16 | 17 | 18 |
19 |

$TITLE

20 |
21 | 22 | 23 | 24 | 25 |
26 | 27 | 81 | 82 | 83 | """ 84 | 85 | 86 | def graph_to_html( 87 | graph: Union[DependencyGraph, PackageCache], 88 | collapse_versions: bool = True, 89 | title: Optional[str] = None, 90 | ) -> str: 91 | if not isinstance(graph, DependencyGraph): 92 | graph = graph.to_graph() 93 | if collapse_versions: 94 | graph = graph.collapse_versions() 95 | 96 | if graph.source_packages: 97 | roots: Set[Package] = graph.source_packages # type: ignore 98 | else: 99 | roots = graph.find_roots().roots 100 | 101 | if not graph.source_packages: 102 | layout = "improvedLayout: false" 103 | else: 104 | layout = "hierarchical: true" 105 | 106 | # sort the nodes and assign IDs to them (so they are in a deterministic order): 107 | node_ids: Dict[Package, int] = {} 108 | for node in sorted(graph): 109 | node_ids[node] = len(node_ids) 110 | 111 | nodes = [] 112 | edges = [] 113 | for package, node_id in node_ids.items(): 114 | nodes.append({"id": node_id, "label": package.full_name}) 115 | if package in roots: 116 | nodes[-1].update( 117 | { 118 | "shape": "square", 119 | "color": "red", 120 | "borderWidth": 4, 121 | } 122 | ) 123 | if package.vulnerabilities: 124 | nodes[-1].update({"color": "red"}) 125 | if graph.source_packages: 126 | nodes[-1]["level"] = max(graph.shortest_path_from_root(package), 0) 127 | for pkg1, pkg2, *_ in graph.out_edges(package): # type: ignore 128 | dep = graph.get_edge_data(pkg1, pkg2)["dependency"] 129 | if collapse_versions: 130 | # if we are collapsing versions, omit the version name 131 | dep_name = f"{dep.source}:{dep.package}" 132 | else: 133 | dep_name = str(dep) 134 | edges.append({"from": node_ids[pkg1], "to": node_ids[pkg2], "shape": "dot"}) 135 | if dep_name != pkg2.full_name: 136 | edges[-1]["label"] = dep_name 137 | 138 | if title is None: 139 | source_packages = ", ".join(p.full_name for p in graph.source_packages) 140 | if not source_packages: 141 | title = "Dependency Graph" 142 | else: 143 | title = f"Dependency Graph for {source_packages}" 144 | 145 | return ( 146 | TEMPLATE.replace("$NODES", repr(nodes)) 147 | .replace("$EDGES", repr(edges)) 148 | .replace("$TITLE", title) 149 | .replace("$LAYOUT", layout) 150 | ) 151 | -------------------------------------------------------------------------------- /it_depends/it_depends.py: -------------------------------------------------------------------------------- 1 | from appdirs import AppDirs 2 | import sys 3 | 4 | if sys.version_info < (3, 12): 5 | import pkg_resources 6 | 7 | def version() -> str: 8 | return pkg_resources.require("it-depends")[0].version 9 | 10 | else: 11 | from importlib.metadata import version as meta_version 12 | 13 | def version() -> str: 14 | return meta_version("it-depends") 15 | 16 | 17 | APP_DIRS = AppDirs("it-depends", "Trail of Bits") -------------------------------------------------------------------------------- /it_depends/native.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from pathlib import Path 3 | import re 4 | from tempfile import NamedTemporaryFile 5 | from threading import Lock 6 | from typing import Dict, FrozenSet, Iterator, Optional 7 | 8 | from tqdm import tqdm 9 | 10 | from . import version as it_depends_version 11 | from .docker import DockerContainer, InMemoryDockerfile, InMemoryFile 12 | from .dependencies import ( 13 | Dependency, 14 | DependencyResolver, 15 | DockerSetup, 16 | Package, 17 | SemanticVersion, 18 | ) 19 | 20 | logger = getLogger(__name__) 21 | 22 | 23 | def make_dockerfile(docker_setup: DockerSetup) -> InMemoryDockerfile: 24 | install_script = InMemoryFile("install.sh", docker_setup.install_package_script.encode("utf-8")) 25 | run_script = InMemoryFile("run.sh", docker_setup.load_package_script.encode("utf-8")) 26 | baseline_script = InMemoryFile("baseline.sh", docker_setup.baseline_script.encode("utf-8")) 27 | pkgs = " ".join(docker_setup.apt_get_packages) 28 | return InMemoryDockerfile( 29 | f""" 30 | FROM ubuntu:20.04 31 | 32 | RUN mkdir -p /workdir 33 | 34 | RUN ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime 35 | 36 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends strace {pkgs} 37 | 38 | {docker_setup.post_install} 39 | 40 | WORKDIR /workdir 41 | 42 | COPY install.sh . 43 | COPY run.sh . 44 | COPY baseline.sh . 45 | RUN chmod +x *.sh 46 | """, 47 | local_files=(install_script, run_script, baseline_script), 48 | ) 49 | 50 | 51 | STRACE_LIBRARY_REGEX = re.compile( 52 | r"^open(at)?\(\s*[^,]*\s*,\s*\"((.+?)([^\./]+)\.so(\.(.+?))?)\".*" 53 | ) 54 | CONTAINERS_BY_SOURCE: Dict[DependencyResolver, DockerContainer] = {} 55 | BASELINES_BY_SOURCE: Dict[DependencyResolver, FrozenSet[Dependency]] = {} 56 | _CONTAINER_LOCK: Lock = Lock() 57 | 58 | 59 | def get_dependencies( 60 | container: DockerContainer, command: str, pre_command: Optional[str] = None 61 | ) -> Iterator[Dependency]: 62 | """Yields all dynamic libraries loaded by `command`, in order, including duplicates""" 63 | stdout = NamedTemporaryFile(prefix="stdout", delete=False) 64 | if pre_command is not None: 65 | pre_command = f"{pre_command} > /dev/null 2>/dev/null && " 66 | else: 67 | pre_command = "" 68 | command = f"{pre_command}strace -e open,openat -f {command} 3>&1 1>&2 2>&3" 69 | try: 70 | container.run( 71 | "bash", 72 | "-c", 73 | command, 74 | rebuild=False, 75 | interactive=False, 76 | stdout=stdout, 77 | check_existence=False, 78 | ) 79 | stdout.close() 80 | with open(stdout.name, "r") as f: 81 | for line in f.readlines(): 82 | m = STRACE_LIBRARY_REGEX.match(line) 83 | if m: 84 | path = m.group(2) 85 | if path not in ("/etc/ld.so.cache",) and path.startswith("/"): 86 | yield Dependency( 87 | package=path, 88 | source="ubuntu", # make the package be from the UbuntuResolver 89 | semantic_version=SemanticVersion.parse("*"), 90 | ) 91 | finally: 92 | Path(stdout.name).unlink() 93 | 94 | 95 | def get_package_dependencies(container: DockerContainer, package: Package) -> Iterator[Dependency]: 96 | yield from get_dependencies( 97 | container=container, 98 | pre_command=f"./install.sh {package.name} {package.version!s}", 99 | command=f"./run.sh {package.name}", 100 | ) 101 | 102 | 103 | def get_baseline_dependencies(container: DockerContainer) -> Iterator[Dependency]: 104 | yield from get_dependencies(container=container, command="./baseline.sh") 105 | 106 | 107 | def container_for(source: DependencyResolver) -> DockerContainer: 108 | with _CONTAINER_LOCK: 109 | if source in CONTAINERS_BY_SOURCE: 110 | return CONTAINERS_BY_SOURCE[source] 111 | docker_setup = source.docker_setup() 112 | if docker_setup is None: 113 | raise ValueError(f"source {source.name} does not support native dependency resolution") 114 | with tqdm( 115 | desc=f"configuring Docker for {source.name}", 116 | leave=False, 117 | unit=" steps", 118 | total=2, 119 | initial=1, 120 | ) as t, make_dockerfile(docker_setup) as dockerfile: 121 | container = DockerContainer( 122 | f"trailofbits/it-depends-{source.name!s}", 123 | dockerfile, 124 | tag=it_depends_version(), 125 | ) 126 | t.update(1) 127 | container.rebuild() 128 | CONTAINERS_BY_SOURCE[source] = container 129 | return container 130 | 131 | 132 | def baseline_for(source: DependencyResolver) -> FrozenSet[Dependency]: 133 | with _CONTAINER_LOCK: 134 | if source not in BASELINES_BY_SOURCE: 135 | baseline = frozenset(get_baseline_dependencies(container_for(source))) 136 | BASELINES_BY_SOURCE[source] = baseline 137 | return baseline 138 | else: 139 | return BASELINES_BY_SOURCE[source] 140 | 141 | 142 | def get_native_dependencies(package: Package, use_baseline: bool = False) -> Iterator[Dependency]: 143 | """Yields the native dependencies for an individual package""" 144 | if not package.resolver.docker_setup(): 145 | return 146 | container = container_for(package.resolver) 147 | if use_baseline: 148 | baseline = baseline_for(package.resolver) 149 | else: 150 | baseline = frozenset() 151 | for dep in get_package_dependencies(container, package): 152 | if dep not in baseline: 153 | yield dep 154 | -------------------------------------------------------------------------------- /it_depends/npm.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from pathlib import Path 4 | import subprocess 5 | from typing import Dict, Iterator, Optional, Union 6 | 7 | from semantic_version import NpmSpec, SimpleSpec, Version 8 | 9 | from .dependencies import ( 10 | AliasedDependency, 11 | Dependency, 12 | DependencyResolver, 13 | DockerSetup, 14 | Package, 15 | PackageCache, 16 | SemanticVersion, 17 | SourcePackage, 18 | SourceRepository, 19 | ) 20 | 21 | log = getLogger(__file__) 22 | 23 | 24 | class NPMResolver(DependencyResolver): 25 | name = "npm" 26 | description = "classifies the dependencies of JavaScript packages using `npm`" 27 | 28 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 29 | return bool(self.is_available()) and (repo.path / "package.json").exists() 30 | 31 | def resolve_from_source( 32 | self, repo: SourceRepository, cache: Optional[PackageCache] = None 33 | ) -> Optional[SourcePackage]: 34 | if not self.can_resolve_from_source(repo): 35 | return None 36 | return NPMResolver.from_package_json(repo) 37 | 38 | @staticmethod 39 | def from_package_json(package_json_path: Union[Path, str, SourceRepository]) -> SourcePackage: 40 | if isinstance(package_json_path, SourceRepository): 41 | path = package_json_path.path 42 | source_repository = package_json_path 43 | else: 44 | path = Path(package_json_path) 45 | source_repository = SourceRepository(path.parent) 46 | if path.is_dir(): 47 | path = path / "package.json" 48 | if not path.exists(): 49 | raise ValueError(f"Expected a package.json file at {path!s}") 50 | with open(path, "r") as json_file: 51 | package = json.load(json_file) 52 | if "name" in package: 53 | name = package["name"] 54 | else: 55 | # use the parent directory name 56 | name = path.parent.name 57 | if "dependencies" in package: 58 | dependencies: Dict[str, str] = package["dependencies"] 59 | else: 60 | dependencies = {} 61 | if "version" in package: 62 | version = package["version"] 63 | else: 64 | version = "0" 65 | version = Version.coerce(version) 66 | 67 | return SourcePackage( 68 | name, 69 | version, 70 | source_repo=source_repository, 71 | source="npm", 72 | dependencies=[generate_dependency_from_information(dep_name, dep_version) 73 | for dep_name, dep_version in dependencies.items()], 74 | ) 75 | 76 | def resolve(self, dependency: Union[Dependency, AliasedDependency]) -> Iterator[Package]: 77 | """Yields all packages that satisfy the dependency without expanding those packages' dependencies""" 78 | if dependency.source != self.name: 79 | return 80 | 81 | dependency_name = dependency.package 82 | if isinstance(dependency, AliasedDependency): 83 | dependency_name = f"@{dependency.alias_name}" 84 | # Fix an issue when setting a dependency with a scope, we need to prefix it with @ 85 | elif dependency_name.count("/") == 1 and not dependency_name.startswith("@"): 86 | dependency_name = f"@{dependency_name}" 87 | 88 | try: 89 | output = subprocess.check_output( 90 | [ 91 | "npm", 92 | "view", 93 | "--json", 94 | f"{dependency_name}@{dependency.semantic_version!s}", 95 | "name", 96 | "version", 97 | "dependencies", 98 | ] 99 | ) 100 | except subprocess.CalledProcessError as e: 101 | log.warning( 102 | f"Error running `npm view --json {dependency_name}@{dependency.semantic_version!s} " 103 | f"dependencies`: {e!s}" 104 | ) 105 | return 106 | 107 | try: 108 | result = json.loads(output) 109 | except ValueError as e: 110 | raise ValueError( 111 | f"Error parsing output of `npm view --json {dependency_name}@{dependency.semantic_version!s} " 112 | f"dependencies`: {e!s}" 113 | ) 114 | 115 | # Only 1 version 116 | if isinstance(result, dict): 117 | deps = result.get("dependencies", {}) 118 | yield Package( 119 | name=dependency.package, 120 | version=Version.coerce(result["version"]), 121 | source=self, 122 | dependencies=( 123 | generate_dependency_from_information(dep_name, dep_version, self) for dep_name, dep_version in deps.items() 124 | ), 125 | ) 126 | elif isinstance(result, list): 127 | # This means that there are multiple dependencies that match the version 128 | for package in result: 129 | assert package["name"] == dependency.package, "Problem with NPM view output" 130 | dependencies = package.get("dependencies", {}) 131 | yield Package( 132 | name=dependency.package, 133 | version=Version.coerce(package["version"]), 134 | source=self, 135 | dependencies=(generate_dependency_from_information(dep_name, dep_version, self) 136 | for dep_name, dep_version in dependencies.items()) 137 | ) 138 | 139 | @classmethod 140 | def parse_spec(cls, spec: str) -> SemanticVersion: 141 | try: 142 | return NpmSpec(spec) 143 | except ValueError: 144 | pass 145 | try: 146 | return SimpleSpec(spec) 147 | except ValueError: 148 | pass 149 | # Sometimes NPM specs have whitespace, which trips up the parser 150 | no_whitespace = "".join(c for c in spec if c != " ") 151 | if no_whitespace != spec: 152 | return NPMResolver.parse_spec(no_whitespace) 153 | 154 | def docker_setup(self) -> DockerSetup: 155 | return DockerSetup( 156 | apt_get_packages=["npm"], 157 | install_package_script="""#!/usr/bin/env bash 158 | npm install $1@$2 159 | """, 160 | load_package_script="""#!/usr/bin/env bash 161 | node -e "require(\\"$1\\")" 162 | """, 163 | baseline_script='#!/usr/bin/env node -e ""\n', 164 | ) 165 | 166 | 167 | def generate_dependency_from_information( 168 | package_name: str, 169 | package_version: str, 170 | source: Union[str, NPMResolver] = "npm", 171 | ) -> Union[Dependency, AliasedDependency, None]: 172 | """Generate a dependency from a dependency declaration. 173 | 174 | A dependency may be declared like this : 175 | * [<@scope>/]@ 176 | * @npm: 177 | """ 178 | if package_version.startswith("npm:"): 179 | # Does the package have a scope ? 180 | 181 | if package_version.count("@") == 2: 182 | parts = package_version.split("@") 183 | scope, version = parts[1], parts[2] 184 | 185 | semantic_version = NPMResolver.parse_spec(version) 186 | if semantic_version is None: 187 | log.warning("Unable to compute the semantic version of %s (%s)", package_name, package_version) 188 | semantic_version = SimpleSpec("*") 189 | 190 | return AliasedDependency( 191 | package=package_name, 192 | alias_name=scope, 193 | semantic_version=semantic_version, 194 | source=source, 195 | ) 196 | 197 | else: 198 | msg = (f"This type of dependencies {package_name} {package_version} is not yet supported." 199 | f" Please open an issue on GitHub.") 200 | raise ValueError(msg) 201 | 202 | else: 203 | semantic_version = NPMResolver.parse_spec(package_version) 204 | if semantic_version is None: 205 | log.warning("Unable to compute the semantic version of %s (%s)", package_name, package_version) 206 | semantic_version = SimpleSpec("*") 207 | 208 | return Dependency( 209 | package=package_name, 210 | semantic_version=semantic_version, 211 | source=source, 212 | ) 213 | -------------------------------------------------------------------------------- /it_depends/pip.py: -------------------------------------------------------------------------------- 1 | import io 2 | from logging import getLogger 3 | from pathlib import Path 4 | from tempfile import TemporaryDirectory 5 | import subprocess 6 | import sys 7 | from typing import Iterable, Iterator, List, Optional, Union 8 | 9 | from johnnydep import JohnnyDist 10 | from johnnydep.logs import configure_logging 11 | 12 | from .dependencies import ( 13 | Dependency, 14 | DependencyResolver, 15 | DockerSetup, 16 | Package, 17 | PackageCache, 18 | SemanticVersion, 19 | SimpleSpec, 20 | SourcePackage, 21 | SourceRepository, 22 | Version, 23 | ) 24 | 25 | 26 | configure_logging(1) 27 | log = getLogger(__file__) 28 | 29 | 30 | class PipResolver(DependencyResolver): 31 | name = "pip" 32 | description = "classifies the dependencies of Python packages using pip" 33 | 34 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 35 | return ( 36 | self.is_available 37 | and (repo.path / "setup.py").exists() 38 | or (repo.path / "requirements.txt").exists() 39 | ) 40 | 41 | def resolve_from_source( 42 | self, repo: SourceRepository, cache: Optional[PackageCache] = None 43 | ) -> Optional[SourcePackage]: 44 | if not self.can_resolve_from_source(repo): 45 | return None 46 | return PipSourcePackage.from_repo(repo) 47 | 48 | def docker_setup(self) -> Optional[DockerSetup]: 49 | return DockerSetup( 50 | apt_get_packages=["python3", "python3-pip", "python3-dev", "gcc"], 51 | install_package_script="""#!/usr/bin/env bash 52 | pip3 install $1==$2 53 | """, 54 | load_package_script="""#!/usr/bin/env bash 55 | python3 -c "import $1" 56 | """, 57 | baseline_script='#!/usr/bin/env python3 -c ""\n', 58 | ) 59 | 60 | @staticmethod 61 | def _get_specifier(dist_or_str: Union[JohnnyDist, str]) -> SimpleSpec: 62 | if isinstance(dist_or_str, JohnnyDist): 63 | dist_or_str = dist_or_str.specifier 64 | try: 65 | return SimpleSpec(dist_or_str) 66 | except ValueError: 67 | return SimpleSpec("*") 68 | 69 | @staticmethod 70 | def parse_requirements_txt_line(line: str) -> Optional[Dependency]: 71 | line = line.strip() 72 | if not line: 73 | return None 74 | for possible_delimiter in ("=", "<", ">", "~", "!"): 75 | delimiter_pos = line.find(possible_delimiter) 76 | if delimiter_pos >= 0: 77 | break 78 | if delimiter_pos < 0: 79 | # the requirement does not have a version specifier 80 | name = line 81 | version = SimpleSpec("*") 82 | else: 83 | name = line[:delimiter_pos] 84 | version = PipResolver._get_specifier(line[delimiter_pos:]) 85 | return Dependency(package=name, semantic_version=version, source=PipResolver()) 86 | 87 | @staticmethod 88 | def get_dependencies( 89 | dist_or_requirements_txt_path: Union[JohnnyDist, Path, str] 90 | ) -> Iterable[Dependency]: 91 | if isinstance(dist_or_requirements_txt_path, JohnnyDist): 92 | return ( 93 | Dependency( 94 | package=child.name, 95 | semantic_version=PipResolver._get_specifier(child), 96 | source=PipResolver(), 97 | ) 98 | for child in dist_or_requirements_txt_path.children 99 | ) 100 | elif isinstance(dist_or_requirements_txt_path, str): 101 | dist_or_requirements_txt_path = Path(dist_or_requirements_txt_path) 102 | with open(dist_or_requirements_txt_path / "requirements.txt", "r") as f: 103 | return filter( 104 | lambda d: d is not None, 105 | ( 106 | PipResolver.parse_requirements_txt_line(line) # type: ignore 107 | for line in f.readlines() 108 | ), 109 | ) 110 | 111 | @staticmethod 112 | def get_version(version_str: str, none_default: Optional[Version] = None) -> Optional[Version]: 113 | if version_str == "none": 114 | # this will happen if the dist is for a local wheel: 115 | return none_default 116 | else: 117 | try: 118 | return Version.coerce(version_str) 119 | except ValueError: 120 | components = version_str.split(".") 121 | if len(components) == 4: 122 | try: 123 | # assume the version component after the last period is the release 124 | return Version( 125 | major=int(components[0]), 126 | minor=int(components[1]), 127 | patch=int(components[2]), 128 | prerelease=components[3], 129 | ) 130 | except ValueError: 131 | pass 132 | # TODO: Figure out a better way to handle invalid version strings 133 | return None 134 | 135 | def resolve_dist( 136 | self, 137 | dist: JohnnyDist, 138 | recurse: bool = True, 139 | version: SemanticVersion = SimpleSpec("*"), 140 | ) -> Iterable[Package]: 141 | queue = [(dist, version)] 142 | packages: List[Package] = [] 143 | while queue: 144 | dist, sem_version = queue.pop() 145 | if dist.version_installed is not None: 146 | none_default = Version.coerce(dist.version_installed) 147 | else: 148 | none_default = None 149 | for version in sem_version.filter( 150 | filter( 151 | lambda v: v is not None, 152 | ( 153 | PipResolver.get_version(v_str, none_default=none_default) 154 | for v_str in dist.versions_available 155 | ), 156 | ) 157 | ): 158 | package = Package( 159 | name=dist.name, 160 | version=version, 161 | dependencies=self.get_dependencies(dist), 162 | source=self, 163 | ) 164 | packages.append(package) 165 | if not recurse: 166 | break 167 | queue.extend((child, self._get_specifier(child)) for child in dist.children) 168 | return packages 169 | 170 | def resolve(self, dependency: Dependency) -> Iterator[Package]: 171 | print(dependency) 172 | try: 173 | return iter( 174 | self.resolve_dist( 175 | JohnnyDist(f"{dependency.package}"), 176 | version=dependency.semantic_version, 177 | recurse=False, 178 | ) 179 | ) 180 | except ValueError as e: 181 | log.warning(str(e)) 182 | return iter(()) 183 | 184 | 185 | class PipSourcePackage(SourcePackage): 186 | @staticmethod 187 | def from_dist(dist: JohnnyDist, source_path: Path) -> "PipSourcePackage": 188 | version_str = dist.specifier 189 | if version_str.startswith("=="): 190 | version_str = version_str[2:] 191 | return PipSourcePackage( 192 | name=dist.name, 193 | version=PipResolver.get_version(version_str), 194 | dependencies=PipResolver.get_dependencies(dist), 195 | source_repo=SourceRepository(source_path), 196 | source="pip", 197 | ) 198 | 199 | @staticmethod 200 | def from_repo(repo: SourceRepository) -> "PipSourcePackage": 201 | if (repo.path / "setup.py").exists(): 202 | with TemporaryDirectory() as tmp_dir: 203 | try: 204 | _ = sys.stderr.fileno() 205 | stderr = sys.stderr 206 | except io.UnsupportedOperation: 207 | stderr = None 208 | subprocess.check_call( 209 | [ 210 | sys.executable, 211 | "-m", 212 | "pip", 213 | "wheel", 214 | "--no-deps", 215 | "-w", 216 | tmp_dir, 217 | str(repo.path.absolute()), 218 | ], 219 | stdout=stderr, 220 | ) 221 | wheel = None 222 | for whl in Path(tmp_dir).glob("*.whl"): 223 | if wheel is not None: 224 | raise ValueError( 225 | f"`pip wheel --no-deps {repo.path!s}` produced multiple wheel files!" 226 | ) 227 | wheel = whl 228 | if wheel is None: 229 | raise ValueError( 230 | f"`pip wheel --no-deps {repo.path!s}` did not produce a wheel file!" 231 | ) 232 | dist = JohnnyDist(str(wheel)) 233 | # force JohnnyDist to read the dependencies before deleting the wheel: 234 | _ = dist.children 235 | return PipSourcePackage.from_dist(dist, repo.path) 236 | elif (repo.path / "requirements.txt").exists(): 237 | # We just have a requirements.txt and no setup.py 238 | # Use the directory name as the package name 239 | name = repo.path.absolute().name 240 | if (repo.path / "VERSION").exists(): 241 | with open(repo.path / "VERSION", "r") as f: 242 | version = PipResolver.get_version(f.read().strip()) 243 | else: 244 | version = PipResolver.get_version("0.0.0") 245 | log.info(f"Could not detect {repo.path} version. Using: {version}") 246 | return PipSourcePackage( 247 | name=name, 248 | version=version, 249 | dependencies=PipResolver.get_dependencies(repo.path), 250 | source_repo=repo, 251 | source="pip", 252 | ) 253 | else: 254 | raise ValueError(f"{repo.path} neither has a setup.py nor a requirements.txt") 255 | -------------------------------------------------------------------------------- /it_depends/resolver.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict 3 | from logging import getLogger 4 | from typing import Dict, FrozenSet, Iterable, Iterator, List, Optional, Set, Tuple 5 | 6 | from semantic_version.base import AllOf, BaseSpec 7 | 8 | from .dependencies import Dependency, Package, PackageCache 9 | from .sbom import SBOM 10 | 11 | logger = getLogger(__name__) 12 | 13 | 14 | class CompoundSpec(BaseSpec): 15 | def __init__(self, *to_combine: BaseSpec): 16 | super(CompoundSpec, self).__init__(",".join(s.expression for s in to_combine)) 17 | self.clause = AllOf(*(s.clause for s in to_combine)) 18 | 19 | @classmethod 20 | def _parse_to_clause(cls, expression): 21 | """Converts an expression to a clause.""" 22 | # Placeholder, we actually set self.clause in self.__init__ 23 | return None 24 | 25 | 26 | class PackageSet: 27 | def __init__(self): 28 | self._packages: Dict[Tuple[str, str], Package] = {} 29 | self._unsatisfied: Dict[Tuple[str, str], Dict[Dependency, Set[Package]]] = \ 30 | defaultdict(lambda: defaultdict(set)) 31 | self.is_valid: bool = True 32 | self.is_complete: bool = True 33 | 34 | def __eq__(self, other): 35 | return isinstance(other, PackageSet) and self._packages.values() == other._packages.values() 36 | 37 | def __hash__(self): 38 | return hash(frozenset(self._packages.values())) 39 | 40 | def __len__(self): 41 | return len(self._packages) 42 | 43 | def __iter__(self) -> Iterator[Package]: 44 | yield from self._packages.values() 45 | 46 | def __contains__(self, package: Package) -> bool: 47 | pkg_spec = (package.name, package.source) 48 | return pkg_spec in self._packages and self._packages[pkg_spec] == package 49 | 50 | def unsatisfied_dependencies(self) -> Iterator[Tuple[Dependency, FrozenSet[Package]]]: 51 | for (pkg_name, pkg_source), deps in sorted( 52 | # try the dependencies with the most options first 53 | self._unsatisfied.items(), 54 | key=lambda x: (len(x[1]), x[0]) 55 | ): 56 | if len(deps) == 0: 57 | continue 58 | elif len(deps) == 1: 59 | dep, packages = next(iter(deps.items())) 60 | else: 61 | # there are multiple requirements for the same dependency 62 | spec = CompoundSpec(*(d.semantic_version for d in deps.keys())) 63 | dep = Dependency(pkg_name, pkg_source, spec) 64 | packages = { 65 | p 66 | for packages in deps.values() 67 | for p in packages 68 | } 69 | 70 | yield dep, frozenset(packages) 71 | 72 | def copy(self) -> "PackageSet": 73 | ret = PackageSet() 74 | ret._packages = self._packages.copy() 75 | ret._unsatisfied = defaultdict(lambda: defaultdict(set)) 76 | for dep_spec, deps in self._unsatisfied.items(): 77 | ret._unsatisfied[dep_spec] = defaultdict(set) 78 | for dep, packages in deps.items(): 79 | ret._unsatisfied[dep_spec][dep] = set(packages) 80 | assert all(p in ret for p in packages) 81 | ret.is_valid = self.is_valid 82 | ret.is_complete = self.is_complete 83 | return ret 84 | 85 | def add(self, package: Package): 86 | pkg_spec = (package.name, package.source) 87 | if pkg_spec in self._packages and self._packages[pkg_spec].version != package.version: 88 | self.is_valid = False 89 | if not self.is_valid: 90 | return 91 | self._packages[pkg_spec] = package 92 | if pkg_spec in self._unsatisfied: 93 | # there are some existing packages that have unsatisfied dependencies that could be 94 | # satisfied by this new package 95 | for dep in list(self._unsatisfied[pkg_spec].keys()): 96 | if dep.match(package): 97 | del self._unsatisfied[pkg_spec][dep] 98 | if len(self._unsatisfied[pkg_spec]) == 0: 99 | del self._unsatisfied[pkg_spec] 100 | # add any new unsatisfied dependencies for this package 101 | for dep in package.dependencies: 102 | dep_spec = (dep.package, dep.source) 103 | if dep_spec not in self._packages: 104 | self._unsatisfied[dep_spec][dep].add(package) 105 | elif not dep.match(self._packages[dep_spec]): 106 | self.is_valid = False 107 | break 108 | 109 | self.is_complete = self.is_valid and len(self._unsatisfied) == 0 110 | 111 | 112 | class PartialResolution: 113 | def __init__(self, packages: Iterable[Package] = (), dependencies: Iterable[Package] = (), 114 | parent: Optional["PartialResolution"] = None): 115 | self._packages: FrozenSet[Package] = frozenset(packages) 116 | self._dependencies: FrozenSet[Package] = frozenset(dependencies) 117 | self.parent: Optional[PartialResolution] = parent 118 | if self.parent is not None: 119 | self.packages: PackageSet = self.parent.packages.copy() 120 | else: 121 | self.packages = PackageSet() 122 | for package in self._packages: 123 | self.packages.add(package) 124 | if not self.is_valid: 125 | break 126 | if self.is_valid: 127 | for dep in self._dependencies: 128 | self.packages.add(dep) 129 | if not self.is_valid: 130 | break 131 | 132 | @property 133 | def is_valid(self) -> bool: 134 | return self.packages.is_valid 135 | 136 | @property 137 | def is_complete(self) -> bool: 138 | return self.packages.is_complete 139 | 140 | def __contains__(self, package: Package) -> bool: 141 | return package in self.packages 142 | 143 | def add(self, packages: Iterable[Package], depends_on: Package) -> "PartialResolution": 144 | return PartialResolution(packages, (depends_on,), parent=self) 145 | 146 | def packages(self) -> Iterator[Package]: 147 | yield from self.packages 148 | 149 | __iter__ = packages 150 | 151 | def dependencies(self) -> Iterator[Tuple[Package, Package]]: 152 | pr: Optional[PartialResolution] = self 153 | while pr is not None: 154 | for depends_on in sorted(pr._dependencies): 155 | for package in pr._packages: 156 | yield package, depends_on 157 | pr = pr.parent 158 | 159 | def __len__(self) -> int: 160 | return len(self.packages) 161 | 162 | def __eq__(self, other): 163 | return isinstance(other, PartialResolution) and self.packages == other.packages 164 | 165 | def __hash__(self): 166 | return hash(self.packages) 167 | 168 | 169 | def resolve_sbom(root_package: Package, packages: PackageCache, order_ascending: bool = True) -> Iterator[SBOM]: 170 | if not root_package.dependencies: 171 | yield SBOM((), (root_package,)) 172 | return 173 | 174 | logger.info(f"Resolving the {['newest', 'oldest'][order_ascending]} possible SBOM for {root_package.name}") 175 | 176 | stack: List[PartialResolution] = [ 177 | PartialResolution(packages=(root_package,)) 178 | ] 179 | 180 | history: Set[PartialResolution] = { 181 | pr for pr in stack 182 | if pr.is_valid 183 | } 184 | 185 | while stack: 186 | pr = stack.pop() 187 | if pr.is_complete: 188 | yield SBOM(pr.dependencies(), root_packages=(root_package,)) 189 | continue 190 | elif not pr.is_valid: 191 | continue 192 | 193 | for dep, required_by in pr.packages.unsatisfied_dependencies(): 194 | if not PartialResolution(packages=required_by, parent=pr).is_valid: 195 | continue 196 | for match in sorted( 197 | packages.match(dep), 198 | key=lambda p: p.version, 199 | reverse=order_ascending 200 | ): 201 | next_pr = pr.add(required_by, match) 202 | if next_pr.is_valid and next_pr not in history: 203 | history.add(next_pr) 204 | stack.append(next_pr) 205 | -------------------------------------------------------------------------------- /it_depends/sbom.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple, Type, TypeVar 2 | 3 | from cyclonedx.builder.this import this_component as cdx_lib_component 4 | from cyclonedx.model import XsUri 5 | from cyclonedx.model.bom import Bom 6 | from cyclonedx.model.component import Component, ComponentType 7 | from cyclonedx.model.contact import OrganizationalEntity 8 | from cyclonedx.output.json import JsonV1Dot5 9 | 10 | from . import version 11 | from .dependencies import Package 12 | 13 | __all__ = "cyclonedx_to_json", "SBOM" 14 | 15 | 16 | S = TypeVar("S", bound="SBOM") 17 | 18 | 19 | class SBOM: 20 | def __init__(self, dependencies: Iterable[Tuple[Package, Package]] = (), root_packages: Iterable[Package] = ()): 21 | self.dependencies: FrozenSet[Tuple[Package, Package]] = frozenset(dependencies) 22 | self.root_packages: FrozenSet[Package] = frozenset(root_packages) 23 | 24 | @property 25 | def packages(self) -> FrozenSet[Package]: 26 | return self.root_packages | { 27 | p 28 | for deps in self.dependencies 29 | for p in deps 30 | } 31 | 32 | def __str__(self): 33 | return ", ".join(p.full_name for p in sorted(self.packages)) 34 | 35 | def to_cyclonedx(self) -> Bom: 36 | bom = Bom() 37 | 38 | expanded: Dict[Package, Component] = {} 39 | 40 | root_component: Optional[Component] = None 41 | 42 | for root_package in sorted( 43 | self.root_packages, 44 | key=lambda package: package.full_name, 45 | reverse=True 46 | ): 47 | root_component = Component( 48 | name=root_package.name, 49 | type=ComponentType.APPLICATION, 50 | version=str(root_package.version), 51 | bom_ref=root_package.full_name, 52 | ) 53 | bom.components.add(root_component) 54 | expanded[root_package] = root_component 55 | 56 | bom.metadata.tools.components.add(cdx_lib_component()) 57 | bom.metadata.tools.components.add(Component( 58 | name="it-depends", 59 | supplier=OrganizationalEntity( 60 | name="Trail of Bits", 61 | urls=[XsUri("https://www.trailofbits.com/")] 62 | ), 63 | type=ComponentType.APPLICATION, 64 | version=version(), 65 | )) 66 | 67 | if root_component is not None: 68 | bom.metadata.component = root_component 69 | 70 | for pkg, depends_on in self.dependencies: 71 | if pkg not in expanded: 72 | component = Component( 73 | name=pkg.name, 74 | type=ComponentType.LIBRARY, 75 | version=str(pkg.version), 76 | bom_ref=f"{pkg.full_name}@{pkg.version!s}" 77 | ) 78 | bom.components.add(component) 79 | else: 80 | component = expanded[pkg] 81 | if depends_on not in expanded: 82 | d_component = Component( 83 | name=depends_on.name, 84 | type=ComponentType.LIBRARY, 85 | version=str(depends_on.version), 86 | bom_ref=f"{depends_on.full_name}@{depends_on.version!s}" 87 | ) 88 | bom.components.add(d_component) 89 | else: 90 | d_component = expanded[depends_on] 91 | bom.register_dependency(component, [d_component]) 92 | 93 | return bom 94 | 95 | def __or__(self, other: "SBOM") -> "SBOM": 96 | return SBOM(self.dependencies | other.dependencies, self.root_packages | other.root_packages) 97 | 98 | def __hash__(self): 99 | return hash((self.root_packages, self.dependencies)) 100 | 101 | def __eq__(self, other): 102 | return isinstance(other, SBOM) and self.root_packages == other.root_packages \ 103 | and self.dependencies == other.dependencies 104 | 105 | 106 | def cyclonedx_to_json(bom: Bom, indent: int = 2) -> str: 107 | return JsonV1Dot5(bom).output_as_string(indent=indent) 108 | -------------------------------------------------------------------------------- /it_depends/ubuntu/__init__.py: -------------------------------------------------------------------------------- 1 | # Load the resolver so it auto-registers itself 2 | from . import resolver 3 | -------------------------------------------------------------------------------- /it_depends/ubuntu/apt.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import gzip 3 | from pathlib import Path 4 | import re 5 | import logging 6 | from threading import Lock 7 | from typing import Dict, List, Optional, Set, Tuple 8 | from urllib import request 9 | 10 | from ..it_depends import APP_DIRS 11 | from .docker import run_command 12 | 13 | logger = logging.getLogger(__name__) 14 | all_packages: Optional[Tuple[str, ...]] = None 15 | _APT_LOCK: Lock = Lock() 16 | 17 | 18 | def get_apt_packages() -> Tuple[str, ...]: 19 | with _APT_LOCK: 20 | global all_packages 21 | if all_packages is None: 22 | logger.info("Rebuilding global apt package list.") 23 | raw_packages = run_command("apt", "list").decode("utf-8") 24 | all_packages = tuple(x.split("/")[0] for x in raw_packages.splitlines() if x) 25 | 26 | logger.info(f"Global apt package count {len(all_packages)}") 27 | return all_packages 28 | 29 | 30 | def search_package(package: str) -> str: 31 | found_packages: List[str] = [] 32 | for apt_package in get_apt_packages(): 33 | if package.lower() not in apt_package: 34 | continue 35 | if re.match( 36 | rf"^(lib)*{re.escape(package.lower())}(\-*([0-9]*)(\.*))*(\-dev)*$", 37 | apt_package, 38 | ): 39 | found_packages.append(apt_package) 40 | found_packages.sort(key=len, reverse=True) 41 | if not found_packages: 42 | raise ValueError(f"Package {package} not found in apt package list.") 43 | logger.info(f"Found {len(found_packages)} matching packages, Choosing {found_packages[0]}") 44 | return found_packages[0] 45 | 46 | 47 | contents_db: Dict[str, List[str]] = {} 48 | _loaded_dbs: Set[Path] = set() 49 | 50 | 51 | @functools.lru_cache(maxsize=5242880) 52 | def _file_to_package_contents(filename: str, arch: str = "amd64"): 53 | """ 54 | Downloads and uses apt-file database directly 55 | # http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-amd64.gz 56 | # http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-i386.gz 57 | """ 58 | if arch not in ("amd64", "i386"): 59 | raise ValueError("Only amd64 and i386 supported") 60 | selected = None 61 | 62 | dbfile = Path(APP_DIRS.user_cache_dir) / f"Contents-{arch}.gz" 63 | if not dbfile.exists(): 64 | request.urlretrieve( 65 | f"http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-{arch}.gz", 66 | dbfile, 67 | ) 68 | if not dbfile in _loaded_dbs: 69 | logger.info("Rebuilding contents db") 70 | with gzip.open(str(dbfile), "rt") as contents: 71 | for line in contents.readlines(): 72 | filename_i, *packages_i = re.split(r"\s+", line[:-1]) 73 | assert len(packages_i) > 0 74 | contents_db.setdefault(filename_i, []).extend(packages_i) 75 | _loaded_dbs.add(dbfile) 76 | 77 | regex = re.compile("(.*/)+" + filename + "$") 78 | matches = 0 79 | for (filename_i, packages_i) in contents_db.items(): 80 | if regex.match(filename_i): 81 | matches += 1 82 | for package_i in packages_i: 83 | if selected is None or len(selected[0]) > len(filename_i): 84 | selected = filename_i, package_i 85 | if selected: 86 | logger.info(f"Found {matches} matching packages for {filename}. Choosing {selected[1]}") 87 | else: 88 | raise ValueError(f"{filename} not found in Contents database") 89 | return selected[1] 90 | 91 | 92 | @functools.lru_cache(maxsize=5242880) 93 | def file_to_packages(filename: str, arch: str = "amd64") -> List[str]: 94 | if arch not in ("amd64", "i386"): 95 | raise ValueError("Only amd64 and i386 supported") 96 | logger.debug(f'Running [{" ".join(["apt-file", "-x", "search", filename])}]') 97 | contents = run_command("apt-file", "-x", "search", filename).decode("utf-8") 98 | selected: List[str] = [] 99 | for line in contents.split("\n"): 100 | if not line: 101 | continue 102 | package_i, _ = line.split(": ") 103 | selected.append(package_i) 104 | return sorted(selected) 105 | 106 | 107 | def file_to_package(filename: str, arch: str = "amd64") -> str: 108 | packages = file_to_packages(filename, arch) 109 | if packages: 110 | _, result = min((len(pkg), pkg) for pkg in packages) 111 | logger.info(f"Found {len(packages)} matching packages for {filename}. Choosing {result}") 112 | return result 113 | else: 114 | raise ValueError(f"{filename} not found in apt-file") 115 | 116 | 117 | def cached_file_to_package( 118 | pattern: str, file_to_package_cache: Optional[List[Tuple[str, str]]] = None 119 | ) -> str: 120 | # file_to_package_cache contains all the files that are provided be previous 121 | # dependencies. If a file pattern is already sastified by current files 122 | # use the package already included as a dependency 123 | if file_to_package_cache is not None: 124 | regex = re.compile("(.*/)+" + pattern + "$") 125 | for package_i, filename_i in file_to_package_cache: 126 | if regex.match(filename_i): 127 | return package_i 128 | 129 | package = file_to_package(pattern) 130 | 131 | # a new package is chosen add all the files it provides to our cache 132 | # uses `apt-file` command line tool 133 | if file_to_package_cache is not None: 134 | contents = run_command("apt-file", "list", package).decode("utf-8") 135 | for line in contents.split("\n"): 136 | if ":" not in line: 137 | break 138 | package_i, filename_i = line.split(": ") 139 | file_to_package_cache.append((package_i, filename_i)) 140 | 141 | return package 142 | -------------------------------------------------------------------------------- /it_depends/ubuntu/docker.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from pathlib import Path 3 | import shutil 4 | import subprocess 5 | import logging 6 | import re 7 | from threading import Lock 8 | from typing import Optional, Pattern 9 | 10 | from ..docker import DockerContainer, InMemoryDockerfile 11 | 12 | _container: Optional[DockerContainer] = None 13 | _UBUNTU_LOCK: Lock = Lock() 14 | 15 | _UBUNTU_NAME_MATCH: Pattern[str] = re.compile(r"^\s*name\s*=\s*\"ubuntu\"\s*$", flags=re.IGNORECASE) 16 | _VERSION_ID_MATCH: Pattern[str] = re.compile( 17 | r"^\s*version_id\s*=\s*\"([^\"]+)\"\s*$", flags=re.IGNORECASE 18 | ) 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | @lru_cache(maxsize=4) 24 | def is_running_ubuntu(check_version: Optional[str] = None) -> bool: 25 | """ 26 | Tests whether the current system is running Ubuntu 27 | 28 | If `check_version` is not None, the specific version of Ubuntu is also tested. 29 | """ 30 | os_release_path = Path("/etc/os-release") 31 | if not os_release_path.exists(): 32 | return False 33 | is_ubuntu = False 34 | version: Optional[str] = None 35 | with open(os_release_path, "r") as f: 36 | for line in f.readlines(): 37 | line = line.strip() 38 | is_ubuntu = is_ubuntu or bool(_UBUNTU_NAME_MATCH.match(line)) 39 | if check_version is None: 40 | if is_ubuntu: 41 | return True 42 | elif version is None: 43 | m = _VERSION_ID_MATCH.match(line) 44 | if m: 45 | version = m.group(1) 46 | else: 47 | break 48 | return is_ubuntu and (check_version is None or version == check_version) 49 | 50 | 51 | def run_command(*args: str) -> bytes: 52 | """ 53 | Runs the given command in Ubuntu 20.04 54 | 55 | If the host system is not running Ubuntu 20.04, the command is run in Docker. 56 | 57 | """ 58 | with _UBUNTU_LOCK: 59 | global _container 60 | if _container is None: 61 | with InMemoryDockerfile( 62 | """FROM ubuntu:20.04 63 | 64 | RUN apt-get update && apt-get install -y apt-file && apt-file update 65 | """ 66 | ) as dockerfile: 67 | _container = DockerContainer("trailofbits/it-depends-apt", dockerfile=dockerfile) 68 | _container.rebuild() 69 | logger.debug(f"running {' '.join(args)} in Docker") 70 | p = _container.run( 71 | *args, 72 | interactive=False, 73 | stdout=subprocess.PIPE, 74 | stderr=subprocess.DEVNULL, 75 | rebuild=False, 76 | ) 77 | if p.returncode != 0: 78 | raise subprocess.CalledProcessError(p.returncode, cmd=f"{' '.join(args)}") 79 | return p.stdout 80 | -------------------------------------------------------------------------------- /it_depends/ubuntu/resolver.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | import shutil 3 | import subprocess 4 | import logging 5 | import re 6 | from typing import Iterable, Iterator, Optional 7 | 8 | from .apt import file_to_packages 9 | from .docker import is_running_ubuntu, run_command 10 | from ..dependencies import ( 11 | Dependency, 12 | DependencyResolver, 13 | Dict, 14 | List, 15 | Package, 16 | PackageCache, 17 | ResolverAvailability, 18 | SimpleSpec, 19 | SourcePackage, 20 | SourceRepository, 21 | Tuple, 22 | Version, 23 | ) 24 | from ..native import get_native_dependencies 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class UbuntuResolver(DependencyResolver): 30 | name = "ubuntu" 31 | description = "expands dependencies based upon Ubuntu package dependencies" 32 | 33 | _pattern = re.compile(r" *(?P[^ ]*)( *\((?P.*)\))? *") 34 | _ubuntu_version = re.compile("([0-9]+:)*(?P[^-]*)(-.*)*") 35 | 36 | @staticmethod 37 | @lru_cache(maxsize=2048) 38 | def ubuntu_packages(package_name: str) -> Iterable[Package]: 39 | """Iterates over all of the package versions available for a package name""" 40 | # Parses the dependencies of dependency.package out of the `apt show` command 41 | logger.debug(f"Running `apt show -a {package_name}`") 42 | try: 43 | contents = run_command("apt", "show", "-a", package_name).decode("utf8") 44 | except subprocess.CalledProcessError as e: 45 | if e.returncode == 100: 46 | contents = "" 47 | else: 48 | raise 49 | 50 | # Possibly means that the package does not appear ubuntu with the exact name 51 | if not contents: 52 | logger.warning(f"Package {package_name} not found in ubuntu installed apt sources") 53 | return () 54 | 55 | # Example depends line: 56 | # Depends: libc6 (>= 2.29), libgcc-s1 (>= 3.4), libstdc++6 (>= 9) 57 | version: Optional[Version] = None 58 | packages: Dict[Tuple[str, Version], List[List[Dependency]]] = {} 59 | for line in contents.split("\n"): 60 | if line.startswith("Version: "): 61 | matched = UbuntuResolver._ubuntu_version.match(line[len("Version: ") :]) 62 | if matched: 63 | # FIXME: Ubuntu versions can include "~", which the semantic_version library does not like 64 | # So hack a fix by simply dropping everything after the tilde: 65 | raw_version = matched.group("version").split("~", maxsplit=1)[0] 66 | version = Version.coerce(raw_version) 67 | if (package_name, version) not in packages: 68 | packages[(package_name, version)] = [] 69 | else: 70 | logger.warning(f"Failed to parse package {package_name} {line}") 71 | elif version is not None and line.startswith("Depends: "): 72 | deps = [] 73 | for dep in line[9:].split(","): 74 | for or_segment in dep.split("|"): 75 | # Fixme: For now, treat each ORed dependency as a separate ANDed dependency 76 | matched = UbuntuResolver._pattern.match(or_segment) 77 | if not matched: 78 | raise ValueError( 79 | f"Invalid dependency line in apt output for {package_name}: {line!r}" 80 | ) 81 | dep_package = matched.group("package") 82 | dep_version = matched.group("version") 83 | try: 84 | # remove trailing ubuntu versions like "-10ubuntu4": 85 | dep_version = dep_version.split("-", maxsplit=1)[0] 86 | dep_version = dep_version.replace(" ", "") 87 | SimpleSpec(dep_version.replace(" ", "")) 88 | except Exception as e: 89 | dep_version = "*" # Yolo FIXME Invalid simple block '= 1:7.0.1-12' 90 | 91 | deps.append((dep_package, dep_version)) 92 | 93 | packages[(package_name, version)].append( 94 | [ 95 | Dependency( 96 | package=pkg, 97 | semantic_version=SimpleSpec(ver), 98 | source=UbuntuResolver(), 99 | ) 100 | for pkg, ver in deps 101 | ] 102 | ) 103 | version = None 104 | 105 | # Sometimes `apt show` will return multiple packages with the same version but different dependencies. 106 | # For example: `apt show -a dkms` 107 | # Currently, we do a union over their dependencies 108 | # TODO: Figure out a better way to handle this 109 | return [ 110 | Package( 111 | name=pkg_name, 112 | version=version, 113 | source=UbuntuResolver(), 114 | dependencies=set().union(*duplicates), # type: ignore 115 | ) 116 | for (pkg_name, version), duplicates in packages.items() 117 | ] 118 | 119 | def resolve(self, dependency: Dependency) -> Iterator[Package]: 120 | if dependency.source != "ubuntu": 121 | raise ValueError( 122 | f"{self} can not resolve dependencies from other sources ({dependency})" 123 | ) 124 | 125 | if dependency.package.startswith("/"): 126 | # this is a file path, likely produced from native.py 127 | try: 128 | deps = [] 129 | for pkg_name in file_to_packages(dependency.package): 130 | deps.append(Dependency(package=pkg_name, source=UbuntuResolver.name)) 131 | if deps: 132 | yield Package( 133 | name=dependency.package, 134 | source=dependency.source, 135 | version=Version.coerce("0"), 136 | dependencies=deps, 137 | ) 138 | except (ValueError, subprocess.CalledProcessError): 139 | pass 140 | else: 141 | for package in UbuntuResolver.ubuntu_packages(dependency.package): 142 | if package.version in dependency.semantic_version: 143 | yield package 144 | 145 | def __lt__(self, other): 146 | """Make sure that the Ubuntu Classifier runs last""" 147 | return False 148 | 149 | def is_available(self) -> ResolverAvailability: 150 | if shutil.which("docker") is None: 151 | return ResolverAvailability( 152 | False, 153 | "`Ubuntu` classifier needs to have Docker installed. Try apt install docker.io.", 154 | ) 155 | return ResolverAvailability(True) 156 | 157 | def can_resolve_from_source(self, repo: SourceRepository) -> bool: 158 | return False 159 | 160 | def resolve_from_source( 161 | self, repo: SourceRepository, cache: Optional[PackageCache] = None 162 | ) -> Optional[SourcePackage]: 163 | return None 164 | 165 | def can_update_dependencies(self, package: Package) -> bool: 166 | return package.source != UbuntuResolver.name 167 | 168 | def update_dependencies(self, package: Package) -> Package: 169 | native_deps = get_native_dependencies(package) 170 | package.dependencies = package.dependencies.union(frozenset(native_deps)) 171 | return package 172 | -------------------------------------------------------------------------------- /it_depends/vcs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to automatically download source repositories from various VCS systems and providers. 3 | Logic largely taken from the implementation of `go get`: 4 | 5 | https://golang.org/src/cmd/go/internal/vcs/vcs.go 6 | 7 | """ 8 | import sys 9 | from dataclasses import dataclass 10 | import os 11 | import re 12 | from re import Pattern 13 | import subprocess 14 | from typing import Callable, cast, Dict, Iterable, List, Optional, Type, TypeVar 15 | 16 | 17 | class VCSResolutionError(ValueError): 18 | pass 19 | 20 | 21 | class GoVCSConfigError(VCSResolutionError): 22 | pass 23 | 24 | 25 | T = TypeVar("T") 26 | 27 | 28 | class VCS: 29 | _DEFAULT_INSTANCE: "VCS" 30 | 31 | def __init__(self, name: str, cmd: str, scheme: Iterable[str], ping_cmd: Iterable[str]): 32 | self.name: str = name 33 | self.cmd: str = cmd 34 | self.scheme: List[str] = list(scheme) 35 | self.ping_cmd: List[str] = list(ping_cmd) 36 | 37 | def __init_subclass__(cls, **kwargs): 38 | setattr(cls, "_DEFAULT_INSTANCE", cls()) 39 | 40 | @classmethod 41 | def default_instance(cls: Type[T]) -> T: 42 | return cast(T, getattr(cls, "_DEFAULT_INSTANCE")) 43 | 44 | def ping(self, repo: str) -> Optional[str]: 45 | env = {"GIT_TERMINAL_PROMPT": "0"} 46 | if os.environ.get("GIT_SSH", "") == "" and os.environ.get("GIT_SSH_COMMAND", "") == "": 47 | # disable any ssh connection pooling by git 48 | env["GIT_SSH_COMMAND"] = "ssh -o ControlMaster=no" 49 | for scheme in self.scheme: 50 | cmd = [self.cmd] + [ 51 | c.replace("{scheme}", scheme).replace("{repo}", repo) for c in self.ping_cmd 52 | ] 53 | if ( 54 | subprocess.call(cmd, stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL, env=env) 55 | == 0 56 | ): 57 | return scheme 58 | return None 59 | 60 | def __hash__(self): 61 | return hash(self.name) 62 | 63 | def __eq__(self, other): 64 | return isinstance(other, VCS) and self.name == other.name 65 | 66 | 67 | class Git(VCS): 68 | def __init__(self): 69 | super().__init__( 70 | name="Git", 71 | cmd="git", 72 | scheme=("git", "https", "http", "git+ssh", "ssh"), 73 | ping_cmd=("ls-remote", "{scheme}://{repo}"), 74 | ) 75 | 76 | 77 | VCSes: List[VCS] = [vcs.default_instance() for vcs in (Git,)] 78 | 79 | # VCS_MOD is a stub for the "mod" scheme. It's returned by 80 | # repoRootForImportPathDynamic, but is otherwise not treated as a VCS command. 81 | VCS_MOD = VCS(name="mod", cmd="", scheme=(), ping_cmd=()) 82 | 83 | 84 | @dataclass 85 | class Match: 86 | prefix: str 87 | import_path: str 88 | repo: str = "" 89 | vcs: str = "" 90 | root: Optional[str] = None 91 | 92 | def expand(self, s: str) -> str: 93 | for key, value in self.__dict__.items(): 94 | if not key.startswith("_"): 95 | s = s.replace(f"{{{key}}}", value) 96 | return s 97 | 98 | 99 | if sys.version_info >= (3, 9): 100 | REGEXP_TYPE = Pattern[str] 101 | else: 102 | REGEXP_TYPE = Pattern 103 | 104 | 105 | @dataclass 106 | class VCSPath: 107 | regexp: REGEXP_TYPE 108 | repo: str = "" 109 | path_prefix: str = "" 110 | check: Optional[Callable[[Match], None]] = None 111 | vcs: Optional[str] = None 112 | schemeless_repo: bool = False 113 | 114 | 115 | class VCSMatchError(VCSResolutionError): 116 | pass 117 | 118 | 119 | def no_vcs_suffix(match: Match): 120 | """ 121 | checks that the repository name does not end in .foo for any version control system foo. 122 | The usual culprit is ".git". 123 | 124 | """ 125 | repo = match.repo 126 | for vcs in VCSes: 127 | if repo.endswith(f".{vcs.cmd}"): 128 | raise VCSMatchError(f"Invalid version control suffix in {match.prefix!r} path") 129 | 130 | 131 | VCS_PATHS: List[VCSPath] = [] 132 | 133 | 134 | def _register(path: VCSPath) -> VCSPath: 135 | VCS_PATHS.append(path) 136 | return path 137 | 138 | 139 | GITHUB = _register( 140 | VCSPath( 141 | path_prefix="github.com", 142 | regexp=re.compile( 143 | r"^(?Pgithub\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$" 144 | ), 145 | vcs="git", 146 | repo="https://{root}", 147 | check=no_vcs_suffix, 148 | ) 149 | ) 150 | 151 | 152 | GENERAL_REPO = _register( 153 | VCSPath( 154 | regexp=re.compile( 155 | r"(?P(?P([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\." 156 | r"(?Pbzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$" 157 | ), 158 | schemeless_repo=True, 159 | ) 160 | ) 161 | 162 | 163 | @dataclass 164 | class Repository: 165 | repo: str 166 | root: str 167 | vcs: VCS 168 | is_custom: bool = False 169 | 170 | 171 | def vcs_by_cmd(cmd: str) -> Optional[VCS]: 172 | """vcsByCmd returns the version control system for the given command name (hg, git, svn, bzr).""" 173 | for vcs in VCSes: 174 | if cmd == vcs.cmd: 175 | return vcs 176 | return None 177 | 178 | 179 | @dataclass 180 | class GoVCSRule: 181 | pattern: str 182 | allowed: List[str] 183 | 184 | 185 | DEFAULT_GO_VCS: List[GoVCSRule] = [ 186 | GoVCSRule("private", ["all"]), 187 | GoVCSRule("public", ["git", "hg"]), 188 | ] 189 | 190 | 191 | GO_VCS_RULES: Optional[List[GoVCSRule]] = None 192 | 193 | 194 | def parse_go_vcs(s: str) -> Optional[List[GoVCSRule]]: 195 | s = s.strip() 196 | if not s: 197 | return None 198 | rules: List[GoVCSRule] = [] 199 | have: Dict[str, str] = {} 200 | for item in s.split(","): 201 | item = item.strip() 202 | if not item: 203 | raise GoVCSConfigError(f"Empty entry in GOVCS") 204 | i = item.find(":") 205 | if i < 0: 206 | raise GoVCSConfigError(f"Malformed entry in GOVCS (missing colon): {item!r}") 207 | pattern, vcs_list = item[:i].strip(), item[i + 1 :].strip() 208 | if not pattern: 209 | raise GoVCSConfigError(f"Empty pattern in GOVCS: {item!r}") 210 | if not vcs_list: 211 | raise GoVCSConfigError(f"Empty VCS list in GOVCS: {item!r}") 212 | if not os.path.isabs(pattern): 213 | raise GoVCSConfigError(f"Relative pattern not allowed in GOVCS: {pattern!r}") 214 | if have.get(pattern, default=""): 215 | raise GoVCSConfigError( 216 | f"Unreachable pattern in GOVCS: {item!r} after {have[pattern]!r}" 217 | ) 218 | have[pattern] = item 219 | allowed = [a.strip() for a in vcs_list.split("|")] 220 | if any(not a for a in allowed): 221 | raise GoVCSConfigError(f"Empty VCS name in GOVCS: {item!r}") 222 | rules.append(GoVCSRule(pattern=pattern, allowed=allowed)) 223 | return rules 224 | 225 | 226 | def check_go_vcs(vcs: VCS, root: str): 227 | if vcs == VCS_MOD: 228 | return 229 | global GO_VCS_RULES 230 | if GO_VCS_RULES is None: 231 | GO_VCS_RULES = parse_go_vcs(os.getenv("GOVCS", "")) 232 | if GO_VCS_RULES is None: 233 | GO_VCS_RULES = [] 234 | GO_VCS_RULES.extend(DEFAULT_GO_VCS) 235 | # TODO: Eventually consider implementing this GOVCS check: 236 | # private := module.MatchPrefixPatterns(cfg.GOPRIVATE, root) 237 | # if !govcs.allow(root, private, vcs.Cmd) { 238 | # what := "public" 239 | # if private { 240 | # what = "private" 241 | # } 242 | # return fmt.Errorf("GOVCS disallows using %s for %s %s; see 'go help vcs'", vcs.Cmd, what, root) 243 | # } 244 | 245 | 246 | def resolve(path: str) -> Repository: 247 | for service in VCS_PATHS: 248 | if not path.startswith(service.path_prefix): 249 | continue 250 | m = service.regexp.match(path) 251 | if m is None: 252 | if service.path_prefix: 253 | raise VCSMatchError(f"Invalid {service.path_prefix} import path {path!r}") 254 | match = Match(prefix=f"{service.path_prefix}/", import_path=path) 255 | if m: 256 | for name, value in m.groupdict().items(): 257 | if name and value: 258 | setattr(match, name, value) 259 | if service.vcs is not None: 260 | match.vcs = match.expand(service.vcs) 261 | if service.repo: 262 | match.repo = match.expand(service.repo) 263 | if service.check is not None: 264 | service.check(match) 265 | vcs = vcs_by_cmd(match.vcs) 266 | if vcs is None: 267 | raise VCSResolutionError(f"unknown version control system {match.vcs!r}") 268 | elif match.root is None: 269 | raise VCSResolutionError(f"{match!r} was expected to have a non-None root!") 270 | check_go_vcs(vcs, match.root) 271 | if not service.schemeless_repo: 272 | repo_url: str = match.repo 273 | else: 274 | scheme = vcs.ping(match.repo) 275 | if scheme is None: 276 | scheme = vcs.scheme[0] 277 | repo_url = f"{scheme}://{match.repo}" 278 | return Repository(repo=repo_url, root=match.root, vcs=vcs) 279 | raise VCSResolutionError(f"Unable to resolve repository for {path!r}") 280 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | line_length = 100 3 | multi_line_output = 3 4 | known_first_party = "it_depends" 5 | include_trailing_comma = true 6 | 7 | [tool.black] 8 | line-length = 100 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | SETUP_DIR = os.path.dirname(os.path.realpath(__file__)) 5 | README_PATH = os.path.join(SETUP_DIR, "README.md") 6 | 7 | with open(README_PATH, "r") as readme: 8 | README = readme.read() 9 | 10 | setup( 11 | name="it-depends", 12 | description="A software dependency analyzer", 13 | long_description=README, 14 | long_description_content_type="text/markdown", 15 | license="LGPL-3.0-or-later", 16 | url="https://github.com/trailofbits/it-depends", 17 | author="Trail of Bits", 18 | version="0.1.3", 19 | packages=find_packages(exclude=["test"]), 20 | python_requires=">=3.7", 21 | install_requires=[ 22 | "appdirs>=1.4.4", 23 | "cyclonedx-python-lib >= 5,< 9", 24 | "docker>=4.4.0", 25 | "graphviz>=0.14.1", 26 | "johnnydep>=1.8", 27 | "networkx>=2.4", 28 | "parse_cmake>=0.4.1", 29 | "semantic_version~=2.8.5", 30 | "sqlalchemy>=1.3", 31 | "tqdm>=4.48.0", 32 | # Indirect dependencies for which we pin a minimum version to mitigate vulnerabilities: 33 | "requests>=2.20.0", # CVE-2018-18074 34 | "urllib3>=1.26.5", # CVE-2021-33503 35 | ], 36 | extras_require={ 37 | "dev": ["flake8", "pytest", "twine", "mypy>=0.812", "types-setuptools", "types-requests"] 38 | }, 39 | entry_points={ 40 | "console_scripts": [ 41 | "it-depends = it_depends.__main__:main" 42 | ] 43 | }, 44 | classifiers=[ 45 | "Development Status :: 4 - Beta", 46 | "Environment :: Console", 47 | "Intended Audience :: Science/Research", 48 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", 49 | "Programming Language :: Python :: 3 :: Only", 50 | "Topic :: Utilities" 51 | ] 52 | ) 53 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trailofbits/it-depends/cdb9a1a04bfb3ee3d61c10e30157e8ce0cb38223/test/__init__.py -------------------------------------------------------------------------------- /test/rebuild_expected_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rebuilds repos/*.expected.json by running the tests in a Docker container to match how they would be run in CI 3 | """ 4 | 5 | from pathlib import Path 6 | from typing import Optional 7 | 8 | from it_depends.docker import DockerContainer, Dockerfile 9 | 10 | from test_smoke import IT_DEPENDS_DIR, SmokeTest, SMOKE_TESTS 11 | 12 | 13 | CI_TEST_PATH: Path = Path(__file__).parent.parent / ".github" / "workflows" / "tests.yml" 14 | _CONTAINER: Optional[DockerContainer] = None 15 | 16 | 17 | def container_type() -> str: 18 | """Returns the Docker container name used in GitHub CI""" 19 | if not CI_TEST_PATH.exists(): 20 | raise ValueError(f"GitHub action file {CI_TEST_PATH!s} does not exist!") 21 | with open(CI_TEST_PATH, "r") as f: 22 | for line in f.readlines(): 23 | line = line.strip() 24 | if line.startswith("runs-on:"): 25 | github_name = line[len("runs-on:"):].lstrip() 26 | hyphen_index = github_name.find("-") 27 | if hyphen_index < 0: 28 | raise ValueError(f"Unknown runs-on: container type {github_name!r} in {CI_TEST_PATH}") 29 | return f"{github_name[:hyphen_index]}:{github_name[hyphen_index+1:]}" 30 | raise ValueError(f"Did not find `runs-on: ...` line in {CI_TEST_PATH}") 31 | 32 | 33 | def get_container() -> DockerContainer: 34 | global _CONTAINER 35 | if _CONTAINER is None: 36 | dockerfile = Dockerfile(IT_DEPENDS_DIR / "Dockerfile") 37 | dockerfile_existed = dockerfile.exists() 38 | try: 39 | if not dockerfile_existed: 40 | with open(dockerfile.path, "w") as f: 41 | f.write(f"""FROM {container_type()} 42 | 43 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && \\ 44 | DEBIAN_FRONTEND=noninteractive apt-get install -y python3 python3-dev python3-pip docker.io \\ 45 | cmake autoconf golang cargo npm clang \\ 46 | && mkdir -p /it-depends 47 | # this is required for cargo: 48 | ENV USER=root 49 | COPY . /it-depends 50 | WORKDIR /it-depends 51 | RUN pip3 install . 52 | """) 53 | _CONTAINER = DockerContainer("trailofbits/it-depends", dockerfile=dockerfile, tag="latest") 54 | _CONTAINER.rebuild() 55 | finally: 56 | if not dockerfile_existed and dockerfile.exists(): 57 | dockerfile.path.unlink() 58 | return _CONTAINER 59 | 60 | 61 | def rebuild(test: SmokeTest): 62 | print(f"Rebuilding {test.expected_json!s}") 63 | container = get_container() 64 | if container.run( 65 | "it-depends", str(test.snapshot_folder.relative_to(IT_DEPENDS_DIR)), "-f", "json", 66 | "-o", str(test.expected_json.relative_to(IT_DEPENDS_DIR)), "--force", 67 | cwd=IT_DEPENDS_DIR, 68 | check_existence=False, rebuild=False, mounts=( 69 | (test.expected_json.parent, "/it-depends/test/repos"), 70 | ("/var/run/docker.sock", "/var/run/docker.sock"), 71 | ), 72 | privileged=True 73 | ) != 0: 74 | raise ValueError(f"it-depends exited with non-zero status for {test.snapshot_folder}!") 75 | print(f"Updated {test.expected_json!s}") 76 | 77 | 78 | if __name__ == "__main__": 79 | for t in sorted(SMOKE_TESTS, key=lambda st: st.repo_name): 80 | rebuild(t) 81 | -------------------------------------------------------------------------------- /test/repos/.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | *-* 3 | *.actual.json -------------------------------------------------------------------------------- /test/repos/cvedb.expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": { 3 | "0.0.0": { 4 | "dependencies": { 5 | "ubuntu:libc6": "*" 6 | }, 7 | "source": "ubuntu" 8 | } 9 | }, 10 | "ubuntu:libc6": { 11 | "2.31.0": { 12 | "dependencies": { 13 | "ubuntu:libgcc-s1": "*", 14 | "ubuntu:libcrypt1": "*" 15 | }, 16 | "source": "ubuntu" 17 | } 18 | }, 19 | "pip:cvss": { 20 | "2.2.0": { 21 | "dependencies": { 22 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*", 23 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*", 24 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*", 25 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*" 26 | }, 27 | "source": "pip" 28 | } 29 | }, 30 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": { 31 | "0.0.0": { 32 | "dependencies": { 33 | "ubuntu:libc6": "*" 34 | }, 35 | "source": "ubuntu" 36 | } 37 | }, 38 | "ubuntu:libtinfo6": { 39 | "6.2.0": { 40 | "dependencies": { 41 | "ubuntu:libc6": ">=2.16" 42 | }, 43 | "source": "ubuntu" 44 | } 45 | }, 46 | "pip:six": { 47 | "1.5.0": { 48 | "dependencies": { 49 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*", 50 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*", 51 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*", 52 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*" 53 | }, 54 | "source": "pip" 55 | } 56 | }, 57 | "ubuntu:libcrypt1": { 58 | "4.4.10": { 59 | "dependencies": { 60 | "ubuntu:libc6": ">=2.25" 61 | }, 62 | "source": "ubuntu" 63 | } 64 | }, 65 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": { 66 | "0.0.0": { 67 | "dependencies": { 68 | "ubuntu:libc6": "*" 69 | }, 70 | "source": "ubuntu" 71 | } 72 | }, 73 | "ubuntu:libgcc-s1": { 74 | "10.3.0": { 75 | "dependencies": { 76 | "ubuntu:libc6": ">=2.14", 77 | "ubuntu:gcc-10-base": "*" 78 | }, 79 | "source": "ubuntu" 80 | }, 81 | "10.0.0": { 82 | "dependencies": { 83 | "ubuntu:libc6": ">=2.14", 84 | "ubuntu:gcc-10-base": "*" 85 | }, 86 | "source": "ubuntu" 87 | } 88 | }, 89 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": { 90 | "0.0.0": { 91 | "dependencies": { 92 | "ubuntu:libtinfo6": "*" 93 | }, 94 | "source": "ubuntu" 95 | } 96 | }, 97 | "pip:python-dateutil": { 98 | "2.8.1": { 99 | "dependencies": { 100 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*", 101 | "pip:six": ">=1.5", 102 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*", 103 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*", 104 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*" 105 | }, 106 | "source": "pip" 107 | } 108 | }, 109 | "pip:cvedb": { 110 | "0.0.4": { 111 | "dependencies": { 112 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*", 113 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*", 114 | "pip:cvss": "~=2.2", 115 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*", 116 | "pip:tqdm": "~=4.48.0", 117 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*", 118 | "pip:python-dateutil": "~=2.8.1" 119 | }, 120 | "source": "pip", 121 | "is_source_package": true 122 | } 123 | }, 124 | "pip:tqdm": { 125 | "4.48.0": { 126 | "dependencies": { 127 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*", 128 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*", 129 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*", 130 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*" 131 | }, 132 | "source": "pip" 133 | } 134 | } 135 | } -------------------------------------------------------------------------------- /test/repos/pe-parse.expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "ubuntu:libboost-filesystem1.71-dev": { 3 | "1.71.0": { 4 | "dependencies": { 5 | "ubuntu:libboost-filesystem1.71.0": "=1.71.0-6ubuntu6", 6 | "ubuntu:libboost1.71-dev": "=1.71.0-6ubuntu6", 7 | "ubuntu:libboost-system1.71-dev": "=1.71.0-6ubuntu6" 8 | }, 9 | "source": "ubuntu" 10 | } 11 | }, 12 | "pip:pepy": { 13 | "1.3.0": { 14 | "dependencies": {}, 15 | "source": "pip", 16 | "is_source_package": true 17 | } 18 | }, 19 | "cmake:dump-pe": { 20 | "0.0.0": { 21 | "dependencies": { 22 | "ubuntu:libboost-filesystem1.71-dev": "*" 23 | }, 24 | "source": "cmake", 25 | "is_source_package": true 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /test/test_apt.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from it_depends.ubuntu.apt import file_to_packages 4 | 5 | 6 | class TestAPT(TestCase): 7 | def test_file_to_package(self): 8 | self.assertEqual(file_to_packages("/usr/bin/python3"), [ 9 | 'python3-activipy', 10 | 'python3-breathe', 11 | 'python3-coverage', 12 | 'python3-coverage', 13 | 'python3-cymruwhois', 14 | 'python3-dbg', 15 | 'python3-dbg', 16 | 'python3-dbg', 17 | 'python3-dbg', 18 | 'python3-dev', 19 | 'python3-future', 20 | 'python3-future', 21 | 'python3-memory-profiler', 22 | 'python3-minimal', 23 | 'python3-pbr', 24 | 'python3-petname', 25 | 'python3-pyroute2', 26 | 'python3-pyroute2', 27 | 'python3-pysaml2', 28 | 'python3-pysaml2', 29 | 'python3-pysaml2', 30 | 'python3-pysaml2', 31 | 'python3-qrcode', 32 | 'python3-stem', 33 | 'python3-unidiff', 34 | 'python3-unittest2', 35 | 'python3-websocket', 36 | 'python3.8-dbg', 37 | 'python3.8-dbg', 38 | 'python3.8-dbg', 39 | 'python3.8-dbg', 40 | 'python3.8-dbg', 41 | 'python3.8-dbg', 42 | 'python3.8-dbg', 43 | 'python3.8-dbg', 44 | 'python3.8-dev', 45 | 'python3.8-minimal', 46 | 'python3.9-dbg', 47 | 'python3.9-dbg', 48 | 'python3.9-dbg', 49 | 'python3.9-dbg', 50 | 'python3.9-dbg', 51 | 'python3.9-dbg', 52 | 'python3.9-dbg', 53 | 'python3.9-dbg', 54 | 'python3.9-dev', 55 | 'python3.9-minimal' 56 | ]) 57 | -------------------------------------------------------------------------------- /test/test_audit.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from it_depends.dependencies import InMemoryPackageCache, Package, Vulnerability 3 | from it_depends import audit 4 | 5 | import logging 6 | import random 7 | import string 8 | import time 9 | from unittest import TestCase 10 | from unittest.mock import Mock, patch 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def _rand_str(n): 17 | """Returns a random string of length n (upper, lower and digits)""" 18 | return ''.join(random.choice(string.ascii_lowercase + 19 | string.ascii_uppercase + string.digits) 20 | for i in range(n)) 21 | 22 | 23 | def _version_str(): 24 | """Returns a typical version string (x.y.z)""" 25 | return f"{random.randint(0, 30)}.{random.randint(0,5)}." \ 26 | f"{random.randint(0, 9)}" 27 | 28 | 29 | def _random_package(): 30 | """Returns a package of random name, version and source""" 31 | return Package(_rand_str(10), _version_str(), _rand_str(5)) 32 | 33 | 34 | def _random_packages(num_packages): 35 | """Returns PackacgeCache populated with num_package random Packages""" 36 | packages = InMemoryPackageCache() 37 | list(map(packages.add, [_random_package() for i in range(num_packages)])) 38 | return packages 39 | 40 | 41 | def _random_vulnerability(): 42 | """Create a random vulnerability""" 43 | return Vulnerability(_rand_str(10), 44 | [_rand_str(3) for i in range(random.randint(0, 7)) if 45 | random.randint(0, 100) < 90], 46 | _rand_str(random.randint(0, 10))) 47 | 48 | 49 | def _random_vulnerabilities(max_count): 50 | """Return up to max_count vulnerabilities""" 51 | return [_random_vulnerability() for x in range(random.randint(0, max_count))] 52 | 53 | 54 | class TestAudit(TestCase): 55 | def setUp(self): 56 | # To be able to repeat a failing test the seed for random is logged 57 | seed = int(time.time()) 58 | random.seed(seed) 59 | logger.warning(f"Using seed: {seed}") 60 | 61 | @patch('it_depends.audit.post') 62 | def test_nopackages_no_requests(self, mock_post): 63 | packages = _random_packages(0) 64 | ret = audit.vulnerabilities(packages) 65 | self.assertEqual(ret, packages) 66 | mock_post.assert_not_called() 67 | 68 | @patch('it_depends.audit.post') 69 | def test_valid_limited_info_response(self, mock_post): 70 | """Ensures that a single vuln with the minimum amount of info we require works""" 71 | packages = _random_packages(1) 72 | mock_post().json.return_value = {"vulns": [{"id": "123"}]} 73 | ret = audit.vulnerabilities(packages) 74 | 75 | pkg = next(p for p in ret) 76 | vuln = next(v for v in pkg.vulnerabilities) # Assume one vulnerability 77 | self.assertEqual(vuln.id, "123") 78 | self.assertEqual(len(vuln.aliases), 0) 79 | self.assertEqual(vuln.summary, "N/A") 80 | 81 | @patch('it_depends.audit.post') 82 | def test_no_vulns_can_be_handled(self, mock_post): 83 | """No vulnerability info can still be handled""" 84 | packages = _random_packages(1) 85 | mock_post().json.return_value = {} 86 | ret = audit.vulnerabilities(packages) 87 | self.assertTrue(all(map(lambda p: len(p.vulnerabilities) == 0, ret))) 88 | 89 | @patch('it_depends.audit.post') 90 | def test_handles_ten_thousand_requests(self, mock_post): 91 | """Constructs ten thousand random packages and maps random vulnerabilities to the packages. 92 | Ensures that the vulnerability information received from OSV is reflected in the Packages""" 93 | 94 | # Create 10k random packages (name, version, source) 95 | packages = _random_packages(10000) 96 | 97 | # For each of the packages map 0 or more vulnerabilities 98 | package_vuln = {(pkg.name, str(pkg.version)): _random_vulnerabilities(10) for pkg in packages} 99 | 100 | # Mocks the json-request to OSV, returns whatever info is in the package_vuln-map 101 | def _osv_response(_, json): 102 | m = Mock() 103 | key = (json["package"]["name"], json["version"]) 104 | if key in package_vuln: 105 | m.json.return_value = {"vulns": list(map(lambda x: x.to_obj(), package_vuln[key]))} 106 | else: 107 | m.json.return_value = {} 108 | return m 109 | 110 | mock_post.side_effect = _osv_response 111 | 112 | # Query all packages for vulnerabilities, ensure that each package received vulnerabilitiy 113 | # info as stated in the package_vuln-map created earlier. 114 | for pkg in audit.vulnerabilities(packages): 115 | pkgvuln = sorted(pkg.vulnerabilities) 116 | expectedvuln = sorted(package_vuln[(pkg.name, str(pkg.version))]) 117 | 118 | self.assertListEqual(pkgvuln, expectedvuln) 119 | 120 | @patch('it_depends.audit.post') 121 | def test_exceptions_are_logged_and_isolated(self, mock_post): 122 | """Ensure that if exceptions happen during vulnerability querying they do not kill execution. 123 | They shall still be logged.""" 124 | packages = _random_packages(100) 125 | lock = threading.Lock() 126 | counter = 0 127 | 128 | def _osv_response(_, json): 129 | nonlocal counter 130 | m = Mock() 131 | m.json.return_value = {} 132 | with lock: 133 | counter += 1 134 | if counter % 2 == 0: 135 | raise Exception("Ouch.") 136 | return m 137 | mock_post.side_effect = _osv_response 138 | 139 | self.assertEqual(len(audit.vulnerabilities(packages)), 100) 140 | -------------------------------------------------------------------------------- /test/test_db.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from it_depends.db import DBPackageCache 4 | from it_depends.dependencies import Dependency, DependencyResolver, Package, ResolverAvailability, SimpleSpec, Version, resolvers, resolver_by_name 5 | 6 | 7 | class TestDB(TestCase): 8 | def setUp(self) -> None: 9 | class UnusedResolver(DependencyResolver): 10 | name: str = "unknown" 11 | description: str = "Used for testing" 12 | 13 | def is_available(self) -> ResolverAvailability: 14 | return ResolverAvailability(False, "Unused resolver") 15 | 16 | def can_resolve_from_source(self, repo) -> bool: 17 | return False 18 | 19 | def resolve_from_source(self, repo, cache=None): 20 | raise NotImplementedError() 21 | 22 | self.unknown = UnusedResolver 23 | del UnusedResolver 24 | 25 | def tearDown(self) -> None: 26 | del self.unknown 27 | resolvers.cache_clear() 28 | resolver_by_name.cache_clear() 29 | import gc 30 | gc.collect() 31 | gc.collect() 32 | # remove Unused resolver from Resolvers global set 33 | 34 | def test_db(self): 35 | with DBPackageCache() as cache: 36 | UnusedResolver = self.unknown 37 | pkg = Package(name="package", version=Version.coerce("1.0.0"), source=UnusedResolver(), 38 | dependencies=(Dependency(package="dep", semantic_version=SimpleSpec(">3.0"), 39 | source=UnusedResolver()),)) 40 | cache.add(pkg) 41 | self.assertIn(pkg, cache) 42 | self.assertEqual(len(cache), 1) 43 | # re-adding the package should be a NO-OP 44 | cache.add(pkg) 45 | self.assertEqual(len(cache), 1) 46 | # try adding the package again, but with fewer dependencies: 47 | smaller_pkg = Package(name="package", version=Version.coerce("1.0.0"), source=UnusedResolver()) 48 | self.assertRaises(ValueError, cache.add, smaller_pkg) 49 | 50 | -------------------------------------------------------------------------------- /test/test_go.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from it_depends.go import GoModule, GoSpec, GoVersion 4 | 5 | 6 | EXAMPLE_MOD = """ 7 | module github.com/btcsuite/btcd 8 | 9 | require ( 10 | \tgithub.com/aead/siphash v1.0.1 // indirect 11 | \tgithub.com/btcsuite/btclog v0.0.0-20170628155309-84c8d2346e9f 12 | \tgithub.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d 13 | \tgithub.com/btcsuite/go-socks v0.0.0-20170105172521-4720035b7bfd 14 | \tgithub.com/btcsuite/goleveldb v0.0.0-20160330041536-7834afc9e8cd 15 | \tgithub.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723 // indirect 16 | \tgithub.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792 17 | \tgithub.com/btcsuite/winsvc v1.0.0 18 | \tgithub.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495 19 | \tgithub.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89 20 | \tgithub.com/jrick/logrotate v1.0.0 21 | \tgithub.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23 // indirect 22 | \tgithub.com/onsi/ginkgo v1.7.0 // indirect 23 | \tgithub.com/onsi/gomega v1.4.3 // indirect 24 | \tgolang.org/x/crypto v0.0.0-20170930174604-9419663f5a44 25 | ) 26 | 27 | go 1.12 28 | """ 29 | 30 | 31 | class TestGo(TestCase): 32 | def test_load_from_github(self): 33 | GoModule.from_git("github.com/golang/protobuf", "https://github.com/golang/protobuf", tag="v1.4.3") 34 | 35 | def test_parsing(self): 36 | module = GoModule.parse_mod(EXAMPLE_MOD) 37 | self.assertEqual(module.name, "github.com/btcsuite/btcd") 38 | self.assertEqual(len(module.dependencies), 15) 39 | self.assertIn(("github.com/btcsuite/websocket", "v0.0.0-20150119174127-31079b680792"), module.dependencies) 40 | 41 | def test_version_parsing(self): 42 | for _, version in GoModule.parse_mod(EXAMPLE_MOD).dependencies: 43 | self.assertEqual(str(GoVersion(version)), version) 44 | self.assertEqual(str(GoSpec(version)), version) 45 | -------------------------------------------------------------------------------- /test/test_graphs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from it_depends.graphs import RootedDiGraph 4 | 5 | class Node(int): 6 | pass 7 | 8 | 9 | class Root(Node): 10 | pass 11 | 12 | 13 | class TestGraphs(unittest.TestCase): 14 | def test_single_root(self): 15 | graph: RootedDiGraph[Node, Root] = RootedDiGraph() 16 | graph.root_type = Root 17 | nodes = [Root(0)] + [Node(i) for i in range(1, 5)] 18 | graph.add_node(nodes[0]) 19 | graph.add_edge(nodes[0], nodes[1]) 20 | graph.add_edge(nodes[0], nodes[2]) 21 | graph.add_edge(nodes[1], nodes[3]) 22 | graph.add_edge(nodes[2], nodes[4]) 23 | self.assertEqual(0, graph.shortest_path_from_root(nodes[0])) 24 | self.assertEqual(1, graph.shortest_path_from_root(nodes[1])) 25 | self.assertEqual(1, graph.shortest_path_from_root(nodes[2])) 26 | self.assertEqual(2, graph.shortest_path_from_root(nodes[3])) 27 | self.assertEqual(2, graph.shortest_path_from_root(nodes[4])) 28 | 29 | def test_two_roots(self): 30 | graph: RootedDiGraph[Node, Root] = RootedDiGraph() 31 | graph.root_type = Root 32 | nodes = [Root(0), Root(1)] + [Node(i) for i in range(2, 5)] 33 | graph.add_node(nodes[0]) 34 | graph.add_node(nodes[1]) 35 | graph.add_edge(nodes[0], nodes[2]) 36 | graph.add_edge(nodes[0], nodes[3]) 37 | graph.add_edge(nodes[1], nodes[3]) 38 | graph.add_edge(nodes[3], nodes[4]) 39 | self.assertEqual(0, graph.shortest_path_from_root(nodes[0])) 40 | self.assertEqual(0, graph.shortest_path_from_root(nodes[1])) 41 | self.assertEqual(1, graph.shortest_path_from_root(nodes[2])) 42 | self.assertEqual(1, graph.shortest_path_from_root(nodes[3])) 43 | self.assertEqual(2, graph.shortest_path_from_root(nodes[4])) 44 | -------------------------------------------------------------------------------- /test/test_native.py: -------------------------------------------------------------------------------- 1 | from platform import machine 2 | from unittest import TestCase 3 | 4 | from it_depends.dependencies import Package, Version 5 | from it_depends.native import get_native_dependencies 6 | 7 | 8 | def arch_string() -> str: 9 | """Returns an architecture dependendent string for filenames 10 | Current support is only arm64/x86_64.""" 11 | # TODO (hbrodin): Make more general. 12 | return "aarch64" if machine() == "arm64" else "x86_64" 13 | 14 | 15 | class TestNative(TestCase): 16 | def test_native(self): 17 | deps = {dep.package for dep in get_native_dependencies(Package( 18 | name="numpy", 19 | version=Version.coerce("1.19.4"), 20 | source="pip" 21 | ))} 22 | arch = arch_string() 23 | self.assertEqual({ 24 | f'/lib/{arch}-linux-gnu/libtinfo.so.6', f'/lib/{arch}-linux-gnu/libnss_files.so.2', 25 | f'/lib/{arch}-linux-gnu/libc.so.6', f'/lib/{arch}-linux-gnu/libdl.so.2' 26 | }, deps) 27 | -------------------------------------------------------------------------------- /test/test_resolver.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from it_depends.resolver import resolve_sbom 4 | from it_depends.sbom import cyclonedx_to_json 5 | 6 | from .test_smoke import SmokeTest 7 | 8 | 9 | class TestResolver(TestCase): 10 | def test_resolve(self): 11 | test = SmokeTest("trailofbits", "it-depends", "3db3d191ce04fb8a19bcc5c000ce84dbb3243f31") 12 | packages = test.run() 13 | for package in packages.source_packages: 14 | for sbom in resolve_sbom(package, packages, order_ascending=True): 15 | # print(str(sbom)) 16 | print(cyclonedx_to_json(sbom.to_cyclonedx())) 17 | break 18 | -------------------------------------------------------------------------------- /test/test_smoke.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from unittest import TestCase 3 | from pathlib import Path 4 | import os 5 | import json 6 | from typing import Set 7 | import urllib 8 | import zipfile 9 | 10 | from it_depends.dependencies import ( 11 | Dependency, InMemoryPackageCache, List, Optional, Package, PackageRepository, resolve, resolver_by_name, resolvers, 12 | SimpleSpec, SourceRepository, Tuple, Union 13 | ) 14 | 15 | IT_DEPENDS_DIR: Path = Path(__file__).absolute().parent.parent 16 | TESTS_DIR: Path = Path(__file__).absolute().parent 17 | REPOS_FOLDER = TESTS_DIR / "repos" 18 | 19 | 20 | class TestResolvers(TestCase): 21 | maxDiff = None 22 | 23 | def test_resolvers(self): 24 | """We see all known resolvers 25 | caveat: Iff an unknown resolver was defined by another test it will appear here 26 | """ 27 | resolver_names = {resolver.name for resolver in resolvers()} 28 | self.assertSetEqual(resolver_names, {'cargo', 'ubuntu', 'autotools', 'go', 'cmake', 'npm', 'pip'}) 29 | self.assertSetEqual(resolvers(), {resolver_by_name(name) for name in resolver_names}) 30 | 31 | def test_objects(self): 32 | # To/From string for nicer output and ergonomics 33 | self.assertEqual(str(Dependency.from_string("pip:cvedb@*")), "pip:cvedb@*") 34 | self.assertEqual(str(Package.from_string("pip:cvedb@0.0.1")), "pip:cvedb@0.0.1") 35 | 36 | # Basic Dependency object handling 37 | dep = Dependency.from_string("pip:cvedb@*") 38 | self.assertEqual(dep.source, "pip") 39 | self.assertEqual(dep.package, "cvedb") 40 | self.assertTrue(dep.semantic_version == SimpleSpec("*")) 41 | self.assertTrue(Dependency(source="pip", package="cvedb", semantic_version=SimpleSpec("*")) == 42 | dep) 43 | 44 | # Dependency match 45 | solution = Package(source="pip", name="cvedb", version="0.0.1") 46 | self.assertTrue(dep.match(solution)) 47 | dep2 = Dependency.from_string("pip:cvedb@<0.2.1") 48 | self.assertTrue(dep2.match(Package.from_string("pip:cvedb@0.2.0"))) 49 | self.assertFalse(dep2.match(Package.from_string("pip:cvedb@0.2.1"))) 50 | 51 | def _test_resolver(self, resolver, dep): 52 | dep = Dependency.from_string(dep) 53 | resolver = resolver_by_name(resolver) 54 | self.assertIs(dep.resolver, resolver) 55 | 56 | solutions = tuple(resolver.resolve(dep)) 57 | self.assertGreater(len(solutions), 0) 58 | for package in solutions: 59 | self.assertEqual(package.source, dep.source) 60 | self.assertEqual(package.name, dep.package) 61 | self.assertTrue(dep.semantic_version.match(package.version)) 62 | self.assertTrue(dep.match(package)) 63 | return solutions 64 | 65 | def test_determinism(self): 66 | """Test if a resolver gives the same solution multiple times in a row. 67 | 68 | Half of the attempts will be without a cache, and the second half will use the same cache. 69 | 70 | """ 71 | cache = InMemoryPackageCache() 72 | to_test: List[Tuple[Union[Dependency, SourceRepository], int]] = [ 73 | (Dependency.from_string(dep_name), 5) for dep_name in ( 74 | "pip:cvedb@*", "ubuntu:libc6@*", "cargo:rand_core@0.6.2", "npm:crypto-js@4.0.0" 75 | ) 76 | ] 77 | to_test.extend([ 78 | (smoke_test.source_repo, 3) for smoke_test in SMOKE_TESTS if smoke_test.repo_name in ( 79 | "bitcoin", 80 | "pe-parse" 81 | ) 82 | ]) 83 | for dep, num_attempts in to_test: 84 | with self.subTest(msg=f"Testing the determinism of dep", dep=dep): 85 | first_result: Set[Package] = set() 86 | for i in range(num_attempts): 87 | if i < num_attempts // 2: 88 | attempt_cache: Optional[InMemoryPackageCache] = None 89 | else: 90 | attempt_cache = cache 91 | result = set(resolve(dep, cache=attempt_cache)) 92 | if i == 0: 93 | first_result = result 94 | else: 95 | self.assertEqual(first_result, result, 96 | msg=f"Results differed on attempt {i + 1} at resolving {dep}") 97 | 98 | def test_pip(self): 99 | self._test_resolver("pip", "pip:cvedb@*") 100 | 101 | def test_ubuntu(self): 102 | self._test_resolver("ubuntu", "ubuntu:libc6@*") 103 | 104 | def test_cargo(self): 105 | self._test_resolver("cargo", "cargo:rand_core@0.6.2") 106 | 107 | def test_npm(self): 108 | self._test_resolver("npm", "npm:crypto-js@4.0.0") 109 | 110 | 111 | class SmokeTest: 112 | def __init__(self, user_name: str, repo_name: str, commit: str): 113 | self.user_name: str = user_name 114 | self.repo_name: str = repo_name 115 | self.commit: str = commit 116 | 117 | self.url: str = f"https://github.com/{user_name}/{repo_name}/archive/{commit}.zip" 118 | self._snapshot_folder: Path = REPOS_FOLDER / (repo_name + "-" + commit) 119 | self._snapshot_zip: Path = self._snapshot_folder.with_suffix(".zip") 120 | 121 | self.expected_json: Path = REPOS_FOLDER / f"{repo_name}.expected.json" 122 | self.actual_json: Path = REPOS_FOLDER / f"{repo_name}.actual.json" 123 | 124 | @property 125 | def snapshot_folder(self) -> Path: 126 | if not self._snapshot_folder.exists(): 127 | urllib.request.urlretrieve(self.url, self._snapshot_zip) 128 | with zipfile.ZipFile(self._snapshot_zip, "r") as zip_ref: 129 | zip_ref.extractall(REPOS_FOLDER) 130 | return self._snapshot_folder 131 | 132 | @property 133 | def source_repo(self) -> SourceRepository: 134 | return SourceRepository(self.snapshot_folder) 135 | 136 | def run(self) -> PackageRepository: 137 | return resolve(self.source_repo) 138 | 139 | def __hash__(self): 140 | return hash((self.user_name, self.repo_name, self.commit)) 141 | 142 | def __eq__(self, other): 143 | return ( 144 | isinstance(other, SmokeTest) and self.user_name == other.user_name and self.repo_name == other.repo_name 145 | and self.commit == other.commit 146 | ) 147 | 148 | 149 | SMOKE_TESTS: Set[SmokeTest] = set() 150 | 151 | 152 | def gh_smoke_test(user_name: str, repo_name: str, commit: str): 153 | smoke_test = SmokeTest(user_name, repo_name, commit) 154 | SMOKE_TESTS.add(smoke_test) 155 | 156 | def do_smoke_test(func): 157 | @wraps(func) 158 | def wrapper(self: TestCase): 159 | package_list = smoke_test.run() 160 | result_it_depends = package_list.to_obj() 161 | with open(smoke_test.actual_json, "w") as f: 162 | f.write(json.dumps(result_it_depends, indent=4, sort_keys=True)) 163 | 164 | if not smoke_test.expected_json.exists(): 165 | raise ValueError(f"File {smoke_test.expected_json.absolute()} needs to be created! See " 166 | f"{smoke_test.actual_json.absolute()} for the output of the most recent run.") 167 | with open(smoke_test.expected_json, "r") as f: 168 | expected = json.load(f) 169 | if result_it_depends != expected: 170 | print(f"See {smoke_test.actual_json.absolute()} for the result of this run.") 171 | self.assertEqual(result_it_depends, expected) 172 | 173 | return func(self, package_list) 174 | 175 | return wrapper 176 | 177 | return do_smoke_test 178 | 179 | 180 | class TestSmoke(TestCase): 181 | maxDiff = None 182 | 183 | def setUp(self) -> None: 184 | if not os.path.exists(REPOS_FOLDER): 185 | os.makedirs(REPOS_FOLDER) 186 | 187 | @gh_smoke_test("trailofbits", "cvedb", "7441dc0e238e31829891f85fd840d9e65cb629d8") 188 | def __test_pip(self, package_list): 189 | pass 190 | 191 | @gh_smoke_test("trailofbits", "siderophile", "7bca0f5a73da98550c29032f6a2a170f472ea241") 192 | def __test_cargo(self, package_list): 193 | pass 194 | 195 | @gh_smoke_test("bitcoin", "bitcoin", "4a267057617a8aa6dc9793c4d711725df5338025") 196 | def __test_autotools(self, package_list): 197 | pass 198 | 199 | @gh_smoke_test("brix", "crypto-js", "971c31f0c931f913d22a76ed488d9216ac04e306") 200 | def __test_npm(self, package_list): 201 | pass 202 | 203 | # @gh_smoke_test("lifting-bits", "rellic", "9cf73b288a3d0c51d5de7e1060cba8656538596f") 204 | @gh_smoke_test("trailofbits", "pe-parse", "94bd12ac539382c303896f175a1ab16352e65a8f") 205 | def __test_cmake(self, package_list): 206 | pass 207 | -------------------------------------------------------------------------------- /test/test_ubuntu.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from unittest.mock import patch 3 | from it_depends.dependencies import Dependency 4 | from it_depends.ubuntu.resolver import UbuntuResolver 5 | 6 | 7 | class TestUbuntu(TestCase): 8 | def test_ubuntu(self): 9 | contents = """Package: dkms 10 | Version: 2.8.1-5ubuntu2 11 | Priority: optional 12 | Section: admin 13 | Origin: Ubuntu 14 | Maintainer: Ubuntu Developers 15 | Original-Maintainer: Dynamic Kernel Modules Support Team 16 | Bugs: https://bugs.launchpad.net/ubuntu/+filebug 17 | Installed-Size: 296 kB 18 | Pre-Depends: lsb-release 19 | Depends: kmod | kldutils, gcc | c-compiler, dpkg-dev, make | build-essential, coreutils (>= 7.3), patch, dctrl-tools 20 | Recommends: fakeroot, sudo, linux-headers-686-pae | linux-headers-amd64 | linux-headers-generic | linux-headers 21 | Suggests: menu, e2fsprogs 22 | Breaks: shim-signed (<< 1.34~) 23 | Homepage: https://github.com/dell-oss/dkms 24 | Download-Size: 66,8 kB 25 | APT-Manual-Installed: no 26 | APT-Sources: http://ar.archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages 27 | Description: Dynamic Kernel Module Support Framework 28 | DKMS is a framework designed to allow individual kernel modules to be upgraded 29 | without changing the whole kernel. It is also very easy to rebuild modules as 30 | you upgrade kernels. 31 | 32 | Package: dkms 33 | Version: 2.8.1-5ubuntu1 34 | Priority: optional 35 | Section: admin 36 | Origin: Ubuntu 37 | Maintainer: Ubuntu Developers 38 | Original-Maintainer: Dynamic Kernel Modules Support Team 39 | Bugs: https://bugs.launchpad.net/ubuntu/+filebug 40 | Installed-Size: 296 kB 41 | Pre-Depends: lsb-release 42 | Depends: kmod | kldutils, gcc | c-compiler, dpkg-dev, make | build-essential, coreutils (>= 7.5), patch 43 | Recommends: fakeroot, sudo, linux-headers-686-pae | linux-headers-amd64 | linux-headers-generic | linux-headers 44 | Suggests: menu, e2fsprogs 45 | Breaks: shim-signed (<< 1.34~) 46 | Homepage: https://github.com/dell-oss/dkms 47 | Download-Size: 66,6 kB 48 | APT-Sources: http://ar.archive.ubuntu.com/ubuntu focal/main amd64 Packages 49 | Description: Dynamic Kernel Module Support Framework 50 | DKMS is a framework designed to allow individual kernel modules to be upgraded 51 | without changing the whole kernel. It is also very easy to rebuild modules as 52 | you upgrade kernels. 53 | 54 | """ 55 | with patch('it_depends.ubuntu.docker.run_command') as mock: 56 | mock.return_value = contents.encode() 57 | deps = tuple(UbuntuResolver().resolve(dependency=Dependency(package="dkms", source="ubuntu"))) 58 | self.assertEqual(len(deps), 1) 59 | self.assertEqual(str(deps[0]), 'ubuntu:dkms@2.8.1[ubuntu:build-essential@*,ubuntu:c-compiler@*,' 60 | 'ubuntu:coreutils@>=7.4,ubuntu:dctrl-tools@*,ubuntu:dpkg-dev@*,' 61 | 'ubuntu:gcc@*,ubuntu:kldutils@*,ubuntu:kmod@*,ubuntu:make@*,ubuntu:patch@*]') 62 | -------------------------------------------------------------------------------- /test/test_vcs.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from it_depends.vcs import resolve 4 | 5 | 6 | class TestVCS(TestCase): 7 | def test_resolve(self): 8 | repo = resolve("github.com/trailofbits/graphtage") 9 | self.assertEqual(repo.vcs.name, "Git") 10 | --------------------------------------------------------------------------------