├── .dockerignore
├── .github
└── workflows
│ ├── pip-audit.yml
│ ├── pythonpublish.yml
│ └── tests.yml
├── CODEOWNERS
├── LICENSE
├── README.md
├── hooks
├── README.md
├── pre-commit
└── pre-push
├── it_depends
├── __init__.py
├── __main__.py
├── audit.py
├── autotools.py
├── cargo.py
├── cli.py
├── cmake.py
├── db.py
├── dependencies.py
├── docker.py
├── go.py
├── graphs.py
├── html.py
├── it_depends.py
├── native.py
├── npm.py
├── pip.py
├── resolver.py
├── sbom.py
├── ubuntu
│ ├── __init__.py
│ ├── apt.py
│ ├── docker.py
│ └── resolver.py
└── vcs.py
├── pyproject.toml
├── setup.py
└── test
├── __init__.py
├── rebuild_expected_output.py
├── repos
├── .gitignore
├── bitcoin.expected.json
├── cvedb.expected.json
├── pe-parse.expected.json
└── siderophile.expected.json
├── test_apt.py
├── test_audit.py
├── test_db.py
├── test_go.py
├── test_graphs.py
├── test_native.py
├── test_resolver.py
├── test_smoke.py
├── test_ubuntu.py
└── test_vcs.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | test/repos
2 |
--------------------------------------------------------------------------------
/.github/workflows/pip-audit.yml:
--------------------------------------------------------------------------------
1 | name: Scan dependencies for vulnerabilities with pip-audit
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 | schedule:
9 | - cron: "0 12 * * *"
10 |
11 | jobs:
12 | pip-audit:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - name: Checkout repository
17 | uses: actions/checkout@v3
18 |
19 | - name: Install Python
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: "3.x"
23 |
24 | - name: Install project
25 | run: |
26 | python -m venv /tmp/pip-audit-env
27 | source /tmp/pip-audit-env/bin/activate
28 |
29 | python -m pip install --upgrade pip
30 | python -m pip install .
31 |
32 |
33 | - name: Run pip-audit
34 | uses: pypa/gh-action-pip-audit@v1.0.8
35 | with:
36 | virtual-environment: /tmp/pip-audit-env
37 |
38 |
--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [published]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-20.04
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/*
32 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: tests
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-20.04
16 | strategy:
17 | matrix:
18 | python-version: ["3.9", "3.10", "3.11", "3.12"]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v1
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | sudo apt-get update -y
29 | sudo apt-get install -y apt-file cmake autoconf golang cargo npm clang
30 | sudo apt-file update
31 | python -m pip install --upgrade pip
32 | pip install setuptools
33 | pip install .[dev]
34 | - name: Lint with flake8
35 | run: |
36 | # stop the build if there are Python syntax errors or undefined names
37 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics
38 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
39 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
40 | - name: Test with pytest
41 | run: |
42 | pytest test --ignore test/repos
43 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ESultanik
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # It-Depends
3 | [](https://badge.fury.io/py/it-depends)
4 | [](https://github.com/trailofbits/it-depends/actions)
5 | [](https://slack.empirehacking.nyc)
6 |
7 | It-Depends is a tool to automatically build a dependency graph and Software Bill of Materials (SBOM) for packages and arbitrary source code repositories. You can use it to enumerate all third party dependencies for a software package, map those dependencies to known security vulnerabilities, as well as compare the similarity between two packages based on their dependencies.
8 |
9 | To the best of our knowledge, It-Depends is the only such tool with the following features:
10 | * Support for C/C++ projects (both autootools and cmake)
11 | * Automated resolution of native library dependencies partially based on dynamic analysis (_e.g._, the Python package `pytz` depends on the native library `libtinfo.so.6`)
12 | * Enumeration of _all possible_ dependency resolutions, not just a _single feasible_ resolution
13 | * A comparison metric for the similarity between two packages based on their dependency graph
14 |
15 | ## Features ⭐
16 | * Supports Go, JavaScript, Rust, Python, and C/C++ projects.
17 | * Accepts source code repositories or package specifications like `pip:it-depends`
18 | * Extracts dependencies of cmake/autotool repostories without building it
19 | * Finds native dependencies for high level languages like Python or JavaScript
20 | * Provides visualization based on vis.js or dot
21 | * Matches dependencies and CVEs
22 | * Export Software Bills of Materials (SBOMs)
23 | * Machine-intelligible JSON output
24 | * Support for the SPDX standard is [in active development](https://github.com/trailofbits/it-depends/tree/dev/spdx)
25 |
26 | ### Can It-Depends Do It? It Depends. 🍋
27 | * It-Depends does not detect vendored or copy/pasted dependencies
28 | * Results from build systems like autotools and cmake that entail arbitrary computation at install time are
29 | best-effort
30 | * Resolution of native dependencies is best-effort
31 | * Some native dependencies are resolved through dynamic analysis
32 | * Native dependencies are inferred by cross-referencing file requirements against paths provided by the Ubuntu
33 | package repository; dependencies may be different across other Linux distributions or Ubuntu versions
34 | * It-Depends attempts to resolve *all* possible package versions that satisfy a dependency
35 | * It-Depends *does not* find a single satisfying package resolution
36 | * The list of resolved packages is intended to be a superset of the packages required by the installation of
37 | a package on any system
38 | * The `--audit` feature may discover vulnerabilities in upstream dependencies that are either not exploitable in the
39 | target package or are in a package version that cannot exist in any valid dependency resolution of the target
40 | package
41 | * It-Depends caches data that it expects to be immutable in a local database
42 | * If a package is ever deleted or yanked from a package repository after it was already cached, It-Depends will
43 | continue to use the cached data unless the cache is cleared with `--clear-cache`
44 |
45 |
46 | ## Quickstart 🚀
47 | ```commandline
48 | $ pip3 install it-depends
49 | ```
50 |
51 | ### Running it 🏃
52 | Run `it-depends` in the root of the source repository you would like to analyze:
53 | ```console
54 | $ cd /path/to/project
55 | $ it-depends
56 | ```
57 | or alternatively point it to the path directly:
58 | ```console
59 | $ it-depends /path/to/project
60 | ```
61 | or alternatively specify a package from a public package repository:
62 | ```console
63 | $ it-depends pip:numpy
64 | $ it-depends apt:libc6@2.31
65 | $ it-depends npm:lodash@>=4.17.0
66 | ```
67 |
68 | It-Depends will output the full dependency hierarchy in JSON format. Additional output formats such
69 | as Graphviz/Dot are available via the `--output-format` option.
70 |
71 | It-Depends can automatically try to match packages against the [OSV vulnerability database](https://osv.dev/) with the
72 | `--audit` option. This is a best-effort matching as it is based on package names, which might not always consistent.
73 | Any discovered vulnerabilities are added to the JSON output.
74 |
75 | It-Depends attempts to parallelize as much of its effort as possible. To limit the maximum number of parallel tasks, use
76 | the `--max-workers` option.
77 |
78 | By default, It-Depends recursively resolves all packages' dependencies to construct a complete dependency graph. The
79 | depth of the recursion can be limited using the `--depth-limit` option. For example,
80 | ```console
81 | $ it-depends pip:graphtage --depth-limit 1
82 | ```
83 | will only enumerate the direct dependencies of Graphtage.
84 |
85 | ### Examples 🧑🏫
86 |
87 | Here is an example of running It-Depends on its own source repository:
88 | 
89 |
90 | This is the resulting [json](https://gist.github.com/feliam/2bdec76f7aa50602869059bfa14df156)
91 | with all the discovered dependencies.
92 | This is the resulting [Graphviz dot file](https://gist.github.com/feliam/275951f5788c23a477bc7cf758a32cc2)
93 | producing this
94 | 
95 |
96 | This is the resulting dependency graph:
97 | 
98 |
99 | ### It-Depends’ Dependencies 🎭
100 |
101 | JavaScript requires `npm`\
102 | Rust requires `cargo`\
103 | Python requires `pip`\
104 | C/C++ requires `autotools` and/or `cmake`\
105 | Several native dependencies are resolved using Ubuntu’s file to path database `apt-file`, but this is seamlessly
106 | handled through an Ubuntu `docker` container on other distributions and operating systems\
107 | Currently `docker` is used to resolve native dependencies
108 |
109 | ## Development 👷
110 | ```commandline
111 | $ git clone https://github.com/trailofbits/it-depends
112 | $ cd it-depends
113 | $ python3 -m venv venv # Optional virtualenv
114 | $ ./venv/bin/activate # Optional virtualenv
115 | $ pip3 install -e '.[dev]'
116 | $ git config core.hooksPath ./hooks # Optionally enable git commit hooks for linting
117 | ```
118 |
119 | ## License and Acknowledgements 📃️
120 |
121 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) based upon work supported by DARPA under Contract No. HR001120C0084 (Distribution Statement **A**, Approved for Public Release: Distribution Unlimited). Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the United States Government or DARPA.
122 |
123 | [Felipe Manzano](https://github.com/feliam) and [Evan Sultanik](https://github.com/ESultanik) are
124 | the active maintainers, but [Alessandro Gario](https://github.com/alessandrogario),
125 | [Eric Kilmer](https://github.com/ekilmer), [Alexander Remie](https://github.com/rmi7), and [Henrik Brodin](https://github.com/hbrodin) all made significant
126 | contributions to the tool’s inception and development.
127 |
128 | It-Depends is licensed under the [GNU Lesser General Public License v3.0](LICENSE). [Contact us](mailto:opensource@trailofbits.com) if you’re looking for an exception to the terms.
129 |
130 | © 2021, Trail of Bits.
131 |
--------------------------------------------------------------------------------
/hooks/README.md:
--------------------------------------------------------------------------------
1 | # Default Git Hooks for it-depends Development
2 |
3 | To enable these hooks, developers must run this from within the repo after cloning:
4 | ```bash
5 | $ git config core.hooksPath ./hooks
6 | ```
7 |
--------------------------------------------------------------------------------
/hooks/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | if git rev-parse --verify HEAD >/dev/null 2>&1
4 | then
5 | against=HEAD
6 | else
7 | # Initial commit: diff against an empty tree object
8 | against=$(git hash-object -t tree /dev/null)
9 | fi
10 |
11 | # If you want to allow non-ASCII filenames set this variable to true.
12 | allownonascii=$(git config --bool hooks.allownonascii)
13 |
14 | # Redirect output to stderr.
15 | exec 1>&2
16 |
17 | # Cross platform projects tend to avoid non-ASCII filenames; prevent
18 | # them from being added to the repository. We exploit the fact that the
19 | # printable range starts at the space character and ends with tilde.
20 | if [ "$allownonascii" != "true" ] &&
21 | # Note that the use of brackets around a tr range is ok here, (it's
22 | # even required, for portability to Solaris 10's /usr/bin/tr), since
23 | # the square bracket bytes happen to fall in the designated range.
24 | test $(git diff --cached --name-only --diff-filter=A -z $against |
25 | LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0
26 | then
27 | cat <<\EOF
28 | Error: Attempt to add a non-ASCII file name.
29 |
30 | This can cause problems if you want to work with people on other platforms.
31 |
32 | To be portable it is advisable to rename the file.
33 |
34 | If you know what you are doing you can disable this check using:
35 |
36 | git config hooks.allownonascii true
37 | EOF
38 | exit 1
39 | fi
40 |
41 | which flake8 >/dev/null 2>/dev/null
42 | RESULT=$?
43 | if [ $RESULT -ne 0 ]; then
44 | cat << \EOF
45 | flake8 is not installed! Run this from the root of the it-depends repo:
46 |
47 | pip3 install -e .[dev]
48 | EOF
49 | exit 1
50 | fi
51 |
52 | echo Linting Python code...
53 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics 1>/dev/null 2>/dev/null
54 | RESULT=$?
55 | if [ $RESULT -ne 0 ]; then
56 | cat <<\EOF
57 | Failed Python lint:
58 |
59 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics
60 |
61 | EOF
62 | flake8 it_depends test --exclude test/repos --count --select=E9,F63,F7,F82 --show-source --statistics
63 | exit 1
64 | fi
65 |
66 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 1>/dev/null 2>/dev/null
67 | RESULT=$?
68 | if [ $RESULT -ne 0 ]; then
69 | cat <<\EOF
70 | Failed Python lint:
71 |
72 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
73 |
74 | EOF
75 | flake8 it_depends test --exclude test/repos --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
76 | exit 1
77 | fi
78 |
79 | which mypy >/dev/null 2>/dev/null
80 | RESULT=$?
81 | if [ $RESULT -ne 0 ]; then
82 | cat << \EOF
83 | mypy is not installed! Run this from the root of the it-depends repo:
84 |
85 | pip3 install -e .[dev]
86 | EOF
87 | exit 1
88 | fi
89 |
90 | echo Type-checking Python code...
91 | mypy --exclude "test/repos/.*" --ignore-missing-imports it_depends test
92 | RESULT=$?
93 | if [ $RESULT -ne 0 ]; then
94 | exit $RESULT
95 | fi
96 |
97 | # If there are whitespace errors, print the offending file names and fail.
98 | git diff-index --check --cached $against --
99 |
--------------------------------------------------------------------------------
/hooks/pre-push:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | which pytest >/dev/null 2>/dev/null
4 | RESULT=$?
5 | if [ $RESULT -ne 0 ]; then
6 | cat << \EOF
7 | pytest is not installed! Run this from the root of the it-depends repo:
8 |
9 | pip3 install -e .[dev]
10 | EOF
11 | exit 1
12 | fi
13 |
14 | pytest test --ignore test/repos
15 | exit $?
16 |
--------------------------------------------------------------------------------
/it_depends/__init__.py:
--------------------------------------------------------------------------------
1 | from importlib import import_module
2 | from pkgutil import iter_modules
3 | from pathlib import Path
4 |
5 | from .it_depends import *
6 |
7 | # Automatically load all modules in the `it_depends` package,
8 | # so all DependencyClassifiers will auto-register themselves:
9 | package_dir = Path(__file__).resolve().parent
10 | for (_, module_name, _) in iter_modules([str(package_dir)]): # type: ignore
11 | # import the module and iterate through its attributes
12 | if module_name != "__main__":
13 | module = import_module(f"{__name__}.{module_name}")
14 |
--------------------------------------------------------------------------------
/it_depends/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import main
2 |
3 | if __name__ == "__main__":
4 | exit(main())
5 |
--------------------------------------------------------------------------------
/it_depends/audit.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from concurrent.futures import ThreadPoolExecutor, as_completed
3 | import logging
4 | from requests import post
5 | from tqdm import tqdm
6 | from typing import Dict, FrozenSet, Iterable, List, Union, Tuple
7 |
8 | from .dependencies import Package, PackageRepository, Vulnerability
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class OSVVulnerability(Vulnerability):
14 | """Represents a vulnerability from the OSV project"""
15 |
16 | """Additional keys available from the OSV Vulnerability db."""
17 | EXTRA_KEYS = [
18 | "published",
19 | "modified",
20 | "withdrawn",
21 | "related",
22 | "package",
23 | "details",
24 | "affects",
25 | "affected",
26 | "references",
27 | "severity",
28 | "database_specific",
29 | "ecosystem_specific",
30 | ]
31 |
32 | def __init__(self, osv_dict: Dict):
33 | # Get the first available information as summary (N/A if none)
34 | summary = osv_dict.get("summary", "") or osv_dict.get("details", "") or "N/A"
35 | super().__init__(osv_dict["id"], osv_dict.get("aliases", []), summary)
36 |
37 | # Inherit all other attributes
38 | for k in OSVVulnerability.EXTRA_KEYS:
39 | setattr(self, k, osv_dict.get(k, None))
40 |
41 | @classmethod
42 | def from_osv_dict(cls, d: Dict):
43 | return OSVVulnerability(d)
44 |
45 |
46 | class VulnerabilityProvider(ABC):
47 | """Interface of a vulnerability provider."""
48 |
49 | def query(self, pkg: Package) -> Iterable[Vulnerability]:
50 | """Queries the vulnerability provider for vulnerabilities in pkg"""
51 | raise NotImplementedError()
52 |
53 |
54 | class OSVProject(VulnerabilityProvider):
55 | """OSV project vulnerability provider"""
56 |
57 | QUERY_URL = "https://api.osv.dev/v1/query"
58 |
59 | def query(self, pkg: Package) -> Iterable[OSVVulnerability]:
60 | """Queries the OSV project for vulnerabilities in Package pkg"""
61 | q = {"version": str(pkg.version), "package": {"name": pkg.name}}
62 | r = post(OSVProject.QUERY_URL, json=q).json()
63 | return map(OSVVulnerability.from_osv_dict, r.get("vulns", []))
64 |
65 |
66 | def vulnerabilities(repo: PackageRepository, nworkers=None) -> PackageRepository:
67 | def _get_vulninfo(pkg: Package) -> Tuple[Package, FrozenSet[Vulnerability]]:
68 | """Enrich a Package with vulnerability information"""
69 | ret = OSVProject().query(pkg)
70 | # Do not modify pkg here to ensure no concurrent
71 | # modifications, instead return and let the main
72 | # thread handle the updates.
73 | return (pkg, frozenset({vuln: vuln for vuln in ret}))
74 |
75 | with ThreadPoolExecutor(max_workers=nworkers) as executor, tqdm(
76 | desc="Checking for vulnerabilities", leave=False, unit=" packages"
77 | ) as t:
78 | futures = {executor.submit(_get_vulninfo, pkg): pkg for pkg in repo}
79 | t.total = len(futures)
80 |
81 | for future in as_completed(futures):
82 | try:
83 | t.update(1)
84 | pkg, vulns = future.result()
85 | except Exception as exc:
86 | logger.error(
87 | "Failed to retrieve vulnerability information. " "Exception: {}".format(exc)
88 | )
89 | else:
90 | pkg.update_vulnerabilities(vulns)
91 |
92 | return repo
93 |
--------------------------------------------------------------------------------
/it_depends/autotools.py:
--------------------------------------------------------------------------------
1 | import os
2 | import functools
3 | import re
4 | import itertools
5 | import shutil
6 | import subprocess
7 | import logging
8 | import tempfile
9 | from typing import List, Optional, Tuple
10 |
11 | from it_depends.ubuntu.apt import cached_file_to_package as file_to_package
12 |
13 | from .dependencies import (
14 | Dependency,
15 | DependencyResolver,
16 | PackageCache,
17 | ResolverAvailability,
18 | SimpleSpec,
19 | SourcePackage,
20 | SourceRepository,
21 | Version,
22 | )
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 |
27 | class AutotoolsResolver(DependencyResolver):
28 | """This attempts to parse configure.ac in an autotool based repo.
29 | It supports the following macros:
30 | AC_INIT, AC_CHECK_HEADER, AC_CHECK_LIB, PKG_CHECK_MODULES
31 |
32 | BUGS:
33 | does not handle boost deps
34 | assumes ubuntu host
35 | """
36 |
37 | name = "autotools"
38 | description = "classifies the dependencies of native/autotools packages parsing configure.ac"
39 |
40 | def is_available(self) -> ResolverAvailability:
41 | if shutil.which("autoconf") is None:
42 | return ResolverAvailability(
43 | False,
44 | "`autoconf` does not appear to be installed! "
45 | "Make sure it is installed and in the PATH.",
46 | )
47 | return ResolverAvailability(True)
48 |
49 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
50 | return bool(self.is_available()) and (repo.path / "configure.ac").exists()
51 |
52 | @staticmethod
53 | def _ac_check_header(header_file, file_to_package_cache=None):
54 | """
55 | Macro: AC_CHECK_HEADER
56 | Checks if the system header file header-file is compilable.
57 | https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Generic-Headers.html
58 | """
59 | logger.info(f"AC_CHECK_HEADER {header_file}")
60 | package_name = file_to_package(
61 | f"{re.escape(header_file)}", file_to_package_cache=file_to_package_cache
62 | )
63 | return Dependency(package=package_name, semantic_version=SimpleSpec("*"), source="ubuntu")
64 |
65 | @staticmethod
66 | def _ac_check_lib(function, file_to_package_cache=None):
67 | """
68 | Macro: AC_CHECK_LIB
69 | Checks for the presence of certain C, C++, or Fortran library archive files.
70 | https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Libraries.html#Libraries
71 | """
72 | lib_file, function_name = function.split(".")
73 | logger.info(f"AC_CHECK_LIB {lib_file}")
74 | package_name = file_to_package(
75 | f"lib{re.escape(lib_file)}(.a|.so)",
76 | file_to_package_cache=file_to_package_cache,
77 | )
78 | return Dependency(package=package_name, semantic_version=SimpleSpec("*"), source="ubuntu")
79 |
80 | @staticmethod
81 | def _pkg_check_modules(module_name, version=None, file_to_package_cache=None):
82 | """
83 | Macro: PKG_CHECK_MODULES
84 | The main interface between autoconf and pkg-config.
85 | Provides a very basic and easy way to check for the presence of a
86 | given package in the system.
87 | """
88 | if not version:
89 | version = "*"
90 | module_file = re.escape(module_name + ".pc")
91 | logger.info(f"PKG_CHECK_MODULES {module_file}, {version}")
92 | package_name = file_to_package(module_file, file_to_package_cache=file_to_package_cache)
93 | return Dependency(
94 | package=package_name, semantic_version=SimpleSpec(version), source="ubuntu"
95 | )
96 |
97 | @staticmethod
98 | @functools.lru_cache(maxsize=128)
99 | def _replace_variables(token: str, configure: str):
100 | """
101 | Search all variable occurrences in token and then try to find
102 | bindings for them in the configure script.
103 | """
104 | if "$" not in token:
105 | return token
106 | variable_list = re.findall(r"\$([a-zA-Z_0-9]+)|\${([_a-zA-Z0-9]+)}", token)
107 | variables = set(
108 | var for var in itertools.chain(*variable_list) if var
109 | ) # remove dups and empty
110 | for var in variables:
111 | logger.info(f"Trying to find bindings for {var} in configure")
112 |
113 | # This tries to find a single assign to the variable in question
114 | # ... var= "SOMETHING"
115 | # We ignore the fact thst variables could also appear in other constructs
116 | # For example:
117 | # for var in THIS THAT ;
118 | # TODO/CHALLENGE Merge this two \/
119 | solutions = re.findall(f'{var}=\\s*"([^"]*)"', configure)
120 | solutions += re.findall(f"{var}=\\s*'([^']*)'", configure)
121 | if len(solutions) > 1:
122 | logger.warning(f"Found several solutions for {var}: {solutions}")
123 | if len(solutions) == 0:
124 | logger.warning(f"No solution found for binding {var}")
125 | continue
126 | logger.info(f"Found a solution {solutions}")
127 | sol = (
128 | solutions
129 | + [
130 | None,
131 | ]
132 | )[0]
133 | if sol is not None:
134 | token = token.replace(f"${var}", sol).replace(f"${{{var}}}", sol)
135 | if "$" in token:
136 | raise ValueError(f"Could not find a binding for variable/s in {token}")
137 | return token
138 |
139 | def resolve_from_source(
140 | self, repo: SourceRepository, cache: Optional[PackageCache] = None
141 | ) -> Optional[SourcePackage]:
142 | if not self.can_resolve_from_source(repo):
143 | return None
144 | logger.info(f"Getting dependencies for autotool repo {repo.path.absolute()}")
145 | with tempfile.NamedTemporaryFile() as tmp:
146 | # builds a temporary copy of configure.ac containing aclocal env
147 | subprocess.check_output(("aclocal", f"--output={tmp.name}"), cwd=repo.path)
148 | with open(tmp.name, "ab") as tmp2:
149 | with open(repo.path / "configure.ac", "rb") as conf:
150 | tmp2.write(conf.read())
151 |
152 | trace = subprocess.check_output(
153 | (
154 | "autoconf",
155 | "-t",
156 | "AC_CHECK_HEADER:$n:$1",
157 | "-t",
158 | "AC_CHECK_LIB:$n:$1.$2",
159 | "-t",
160 | "PKG_CHECK_MODULES:$n:$2",
161 | "-t",
162 | "PKG_CHECK_MODULES_STATIC:$n",
163 | tmp.name,
164 | ),
165 | cwd=repo.path,
166 | ).decode("utf8")
167 | configure = subprocess.check_output(["autoconf", tmp.name], cwd=repo.path).decode(
168 | "utf8"
169 | )
170 |
171 | file_to_package_cache: List[Tuple[str]] = []
172 | deps = []
173 | for macro in trace.split("\n"):
174 | logger.debug(f"Handling: {macro}")
175 | macro, *arguments = macro.split(":")
176 | try:
177 | arguments = tuple(self._replace_variables(arg, configure) for arg in arguments) # type: ignore
178 | except Exception as e:
179 | logger.info(str(e))
180 | continue
181 | try:
182 | if macro == "AC_CHECK_HEADER":
183 | deps.append(
184 | self._ac_check_header(
185 | header_file=arguments[0],
186 | file_to_package_cache=file_to_package_cache,
187 | )
188 | )
189 | elif macro == "AC_CHECK_LIB":
190 | deps.append(
191 | self._ac_check_lib(
192 | function=arguments[0],
193 | file_to_package_cache=file_to_package_cache,
194 | )
195 | )
196 | elif macro == "PKG_CHECK_MODULES":
197 | module_name, *version = arguments[0].split(" ")
198 | deps.append(
199 | self._pkg_check_modules(
200 | module_name=module_name,
201 | version="".join(version),
202 | file_to_package_cache=file_to_package_cache,
203 | )
204 | )
205 | else:
206 | logger.error("Macro not supported %r", macro)
207 | except Exception as e:
208 | logger.error(str(e))
209 | continue
210 |
211 | """
212 | # Identity of this package.
213 | PACKAGE_NAME='Bitcoin Core'
214 | PACKAGE_TARNAME='bitcoin'
215 | PACKAGE_VERSION='21.99.0'
216 | PACKAGE_STRING='Bitcoin Core 21.99.0'
217 | PACKAGE_BUGREPORT='https://github.com/bitcoin/bitcoin/issues'
218 | PACKAGE_URL='https://bitcoincore.org/"""
219 | try:
220 | package_name = self._replace_variables("$PACKAGE_NAME", configure)
221 | except ValueError as e:
222 | logger.error(str(e))
223 | package_name = os.path.basename(repo.path)
224 |
225 | try:
226 | package_version = self._replace_variables("$PACKAGE_VERSION", configure)
227 | except ValueError as e:
228 | logger.error(str(e))
229 | package_version = "0.0.0"
230 |
231 | return SourcePackage(
232 | name=package_name,
233 | version=Version.coerce(package_version),
234 | source=self.name,
235 | dependencies=deps,
236 | source_repo=repo,
237 | )
238 |
--------------------------------------------------------------------------------
/it_depends/cargo.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import json
3 | import tempfile
4 | import shutil
5 | import subprocess
6 | import logging
7 | from typing import Iterator, Optional, Type, Union, Dict
8 |
9 | from semantic_version.base import Always, BaseSpec
10 |
11 | from .dependencies import (
12 | Dependency,
13 | DependencyResolver,
14 | Package,
15 | PackageCache,
16 | ResolverAvailability,
17 | SimpleSpec,
18 | SourcePackage,
19 | SourceRepository,
20 | Version,
21 | InMemoryPackageCache,
22 | )
23 |
24 | logger = logging.getLogger(__name__)
25 |
26 |
27 | @BaseSpec.register_syntax
28 | class CargoSpec(SimpleSpec):
29 | SYNTAX = "cargo"
30 |
31 | class Parser(SimpleSpec.Parser):
32 | @classmethod
33 | def parse(cls, expression):
34 | # The only difference here is that cargo clauses can have whitespace, so we need to strip each block:
35 | blocks = [b.strip() for b in expression.split(",")]
36 | clause = Always()
37 | for block in blocks:
38 | if not cls.NAIVE_SPEC.match(block):
39 | raise ValueError("Invalid simple block %r" % block)
40 | clause &= cls.parse_block(block)
41 |
42 | return clause
43 |
44 | def __str__(self):
45 | # remove the whitespace to canonicalize the spec
46 | return ",".join(b.strip() for b in self.expression.split(","))
47 |
48 | def __or__(self, other):
49 | return CargoSpec(f"{self.expression},{other.expression}")
50 |
51 |
52 | def get_dependencies(
53 | repo: SourceRepository,
54 | check_for_cargo: bool = True,
55 | cache: Optional[PackageCache] = None,
56 | ) -> Iterator[Package]:
57 | if check_for_cargo and shutil.which("cargo") is None:
58 | raise ValueError(
59 | "`cargo` does not appear to be installed! Make sure it is installed and in the PATH."
60 | )
61 |
62 | metadata = json.loads(
63 | subprocess.check_output(["cargo", "metadata", "--format-version", "1"], cwd=repo.path)
64 | )
65 |
66 | if "workspace_members" in metadata:
67 | workspace_members = {member[: member.find(" ")] for member in metadata["workspace_members"]}
68 | else:
69 | workspace_members = set()
70 |
71 | for package in metadata["packages"]:
72 | if package["name"] in workspace_members:
73 | _class: Type[Union[SourcePackage, Package]] = SourcePackage
74 | kwargs = {"source_repo": repo}
75 | else:
76 | _class = Package
77 | kwargs = {}
78 |
79 | dependencies: Dict[str, Dependency] = {}
80 | for dep in package["dependencies"]:
81 | if dep["kind"] is not None:
82 | continue
83 | if dep["name"] in dependencies:
84 | dependencies[dep["name"]].semantic_version = dependencies[
85 | dep["name"]
86 | ].semantic_version | CargoResolver.parse_spec(dep["req"])
87 | else:
88 | dependencies[dep["name"]] = Dependency(
89 | package=dep["name"],
90 | semantic_version=CargoResolver.parse_spec(dep["req"]),
91 | source=CargoResolver(),
92 | )
93 |
94 | yield _class( # type: ignore
95 | name=package["name"],
96 | version=Version.coerce(package["version"]),
97 | source="cargo",
98 | dependencies=dependencies.values(),
99 | vulnerabilities=(),
100 | **kwargs,
101 | )
102 |
103 |
104 | class CargoResolver(DependencyResolver):
105 | name = "cargo"
106 | description = "classifies the dependencies of Rust packages using `cargo metadata`"
107 |
108 | def is_available(self) -> ResolverAvailability:
109 | if shutil.which("cargo") is None:
110 | return ResolverAvailability(
111 | False,
112 | "`cargo` does not appear to be installed! "
113 | "Make sure it is installed and in the PATH.",
114 | )
115 | return ResolverAvailability(True)
116 |
117 | @classmethod
118 | def parse_spec(cls, spec: str) -> CargoSpec:
119 | return CargoSpec(spec)
120 |
121 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
122 | return bool(self.is_available()) and (repo.path / "Cargo.toml").exists()
123 |
124 | def resolve_from_source(
125 | self, repo: SourceRepository, cache: Optional[PackageCache] = None
126 | ) -> Optional[SourcePackage]:
127 | if not self.can_resolve_from_source(repo):
128 | return None
129 | result = None
130 | for package in get_dependencies(repo, check_for_cargo=False):
131 | if isinstance(package, SourcePackage):
132 | result = package
133 | else:
134 | if cache is not None:
135 | cache.add(package)
136 | for dep in package.dependencies:
137 | if not cache.was_resolved(dep):
138 | cache.set_resolved(dep)
139 | return result
140 |
141 | def resolve(self, dependency: Dependency) -> Iterator[Package]:
142 | """search_result = subprocess.check_output(["cargo", "search", "--limit", "100", str(dependency.package)]).decode()
143 | for line in search_result.splitlines():
144 | pkgid = (line.split("#", 1)[0].strip())
145 | if pkgid.startswith(f"{dependency.package}"):
146 | break
147 | else:
148 | return
149 | """
150 | pkgid = dependency.package
151 |
152 | # Need to translate a semantic version into a cargo semantic version
153 | # https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#caret-requirements
154 | # caret requirement
155 | semantic_version = str(dependency.semantic_version)
156 | semantic_versions = semantic_version.split(",")
157 | cache = InMemoryPackageCache()
158 | with cache:
159 | for semantic_version in map(str.strip, semantic_versions):
160 | if semantic_version[0].isnumeric():
161 | semantic_version = "=" + semantic_version
162 | pkgid = f'{pkgid.split("=")[0].strip()} = "{semantic_version}"'
163 |
164 | logger.debug(f"Found {pkgid} for {dependency} in crates.io")
165 | with tempfile.TemporaryDirectory() as tmpdir:
166 | subprocess.check_output(["cargo", "init"], cwd=tmpdir)
167 | with open(Path(tmpdir) / "Cargo.toml", "a") as f:
168 | f.write(f"{pkgid}\n")
169 | self.resolve_from_source(SourceRepository(path=tmpdir), cache)
170 | cache.set_resolved(dependency)
171 | # TODO: propagate up any other info we have in this cache
172 | return cache.match(dependency)
173 |
--------------------------------------------------------------------------------
/it_depends/cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from contextlib import contextmanager
3 | import json
4 | from pathlib import Path
5 | import sys
6 | from typing import Iterator, Optional, Sequence, TextIO, Union
7 | import webbrowser
8 |
9 | from sqlalchemy.exc import OperationalError
10 |
11 | from .audit import vulnerabilities
12 | from .db import DEFAULT_DB_PATH, DBPackageCache
13 | from .dependencies import Dependency, resolvers, resolve, SourceRepository
14 | from .it_depends import version as it_depends_version
15 | from .html import graph_to_html
16 | from .resolver import resolve_sbom
17 | from .sbom import cyclonedx_to_json
18 |
19 |
20 | @contextmanager
21 | def no_stdout() -> Iterator[TextIO]:
22 | """A context manager that redirects STDOUT to STDERR"""
23 | saved_stdout = sys.stdout
24 | sys.stdout = sys.stderr
25 | try:
26 | yield saved_stdout
27 | finally:
28 | sys.stdout = saved_stdout
29 |
30 |
31 | def parse_path_or_package_name(
32 | path_or_name: str,
33 | ) -> Union[SourceRepository, Dependency]:
34 | repo_path = Path(path_or_name)
35 | try:
36 | dependency: Optional[Dependency] = Dependency.from_string(path_or_name)
37 | except ValueError as e:
38 | if str(e).endswith("is not a known resolver") and not repo_path.exists():
39 | raise ValueError(f"Unknown resolver: {path_or_name}")
40 | dependency = None
41 | if dependency is None or repo_path.exists():
42 | return SourceRepository(path_or_name)
43 | else:
44 | return dependency
45 |
46 |
47 | def main(argv: Optional[Sequence[str]] = None) -> int:
48 | if argv is None:
49 | argv = sys.argv
50 |
51 | parser = argparse.ArgumentParser(description="a source code dependency analyzer")
52 |
53 | parser.add_argument(
54 | "PATH_OR_NAME",
55 | nargs="?",
56 | type=str,
57 | default=".",
58 | help="path to the directory to analyze, or a package name in the form of "
59 | "RESOLVER_NAME:PACKAGE_NAME[@OPTIONAL_VERSION], where RESOLVER_NAME is a resolver listed "
60 | 'in `it-depends --list`. For example: "pip:numpy", "apt:libc6@2.31", or '
61 | '"npm:lodash@>=4.17.0".',
62 | )
63 |
64 | parser.add_argument(
65 | "--audit",
66 | "-a",
67 | action="store_true",
68 | help="audit packages for known vulnerabilities using " "Google OSV",
69 | )
70 | parser.add_argument("--list", "-l", action="store_true", help="list available package resolver")
71 | parser.add_argument(
72 | "--database",
73 | "-db",
74 | type=str,
75 | nargs="?",
76 | default=DEFAULT_DB_PATH,
77 | help='alternative path to load/store the database, or ":memory:" to cache all results in '
78 | f"memory rather than reading/writing to disk (default is {DEFAULT_DB_PATH!s})",
79 | )
80 | parser.add_argument(
81 | "--clear-cache",
82 | action="store_true",
83 | help="clears the database specified by `--database` "
84 | "(equivalent to deleting the database file)",
85 | )
86 | parser.add_argument(
87 | "--compare",
88 | "-c",
89 | nargs="?",
90 | type=str,
91 | help="compare PATH_OR_NAME to another package specified according to the same rules as "
92 | "PATH_OR_NAME; this option will override the --output-format option and will instead "
93 | "output a floating point similarity metric. By default, the metric will be in the range"
94 | "[0, ∞), with zero meaning that the dependency graphs are identical. For a metric in the "
95 | "range [0, 1], see the `--normalize` option.",
96 | )
97 | parser.add_argument(
98 | "--normalize",
99 | "-n",
100 | action="store_true",
101 | help="Used in conjunction with `--compare`, this will change the output metric to be in the "
102 | "range [0, 1] where 1 means the graphs are identical and 0 means the graphs are as "
103 | "different as possible.",
104 | )
105 | parser.add_argument(
106 | "--output-format",
107 | "-f",
108 | choices=("json", "dot", "html", "cyclonedx"),
109 | default="json",
110 | help="how the output should be formatted (default is JSON); note that `cyclonedx` will output a single "
111 | "satisfying dependency resolution rather than the universe of all possible resolutions "
112 | "(see `--newest-resolution`)",
113 | )
114 | parser.add_argument("--latest-resolution", "-lr", action="store_true",
115 | help="by default, the `cyclonedx` output format emits a single satisfying dependency "
116 | "resolution containing the oldest versions of all of the packages possible; this option "
117 | "instead returns the latest latest possible resolution")
118 | parser.add_argument(
119 | "--output-file",
120 | "-o",
121 | type=str,
122 | default=None,
123 | help="path to the output file; default is to write output to STDOUT",
124 | )
125 | parser.add_argument(
126 | "--force",
127 | action="store_true",
128 | help="force overwriting the output file even if it already exists",
129 | )
130 | parser.add_argument(
131 | "--all-versions",
132 | action="store_true",
133 | help="for `--output-format html`, this option will emit all package versions that satisfy each "
134 | "dependency",
135 | )
136 | parser.add_argument(
137 | "--depth-limit",
138 | "-d",
139 | type=int,
140 | default=-1,
141 | help="depth limit for recursively solving dependencies (default is -1 to resolve all "
142 | "dependencies)",
143 | )
144 | parser.add_argument(
145 | "--max-workers",
146 | "-j",
147 | type=int,
148 | default=None,
149 | help="maximum number of jobs to run concurrently" " (default is # of CPUs)",
150 | )
151 | parser.add_argument(
152 | "--version",
153 | "-v",
154 | action="store_true",
155 | help="print it-depends' version and exit",
156 | )
157 |
158 | args = parser.parse_args(argv[1:])
159 |
160 | if args.version:
161 | sys.stderr.write("it-depends version ")
162 | sys.stderr.flush()
163 | version = it_depends_version()
164 | sys.stdout.write(str(version))
165 | sys.stdout.flush()
166 | sys.stderr.write("\n")
167 | return 0
168 |
169 | try:
170 | repo = parse_path_or_package_name(args.PATH_OR_NAME)
171 |
172 | if args.compare is not None:
173 | to_compare: Optional[Union[SourceRepository, Dependency]] = parse_path_or_package_name(
174 | args.compare
175 | )
176 | else:
177 | to_compare = None
178 | except ValueError as e:
179 | sys.stderr.write(str(e))
180 | sys.stderr.write("\n\n")
181 | return 1
182 |
183 | if args.clear_cache:
184 | db_path = Path(args.database)
185 | if db_path.exists():
186 | if sys.stderr.isatty() and sys.stdin.isatty():
187 | while True:
188 | if args.database != DEFAULT_DB_PATH:
189 | sys.stderr.write(f"Cache file: {db_path.absolute()}\n")
190 | sys.stderr.write(
191 | "Deleting the cache will require all past resoltuions to be recalculated, which "
192 | "can be slow.\nAre you sure? [yN] "
193 | )
194 | try:
195 | choice = input("").lower().strip()
196 | except KeyboardInterrupt:
197 | return 1
198 | if choice == "y":
199 | db_path.unlink()
200 | sys.stderr.write("Cache cleared.\n")
201 | break
202 | elif choice == "n" or choice == "":
203 | break
204 | else:
205 | db_path.unlink()
206 | sys.stderr.write("Cache cleared.\n")
207 |
208 | if args.list:
209 | sys.stdout.flush()
210 | if isinstance(repo, SourceRepository):
211 | path = repo.path.absolute()
212 | else:
213 | path = args.PATH_OR_NAME
214 | sys.stderr.write(f"Available resolvers for {path}:\n")
215 | sys.stderr.flush()
216 | for name, classifier in sorted((c.name, c) for c in resolvers()):
217 | sys.stdout.write(name + " " * (12 - len(name)))
218 | sys.stdout.flush()
219 | available = classifier.is_available()
220 | if not available:
221 | sys.stderr.write(f"\tnot available: {available.reason}")
222 | sys.stderr.flush()
223 | elif isinstance(repo, SourceRepository) and not classifier.can_resolve_from_source(
224 | repo
225 | ):
226 | sys.stderr.write("\tincompatible with this path")
227 | sys.stderr.flush()
228 | elif isinstance(repo, Dependency) and repo.source != classifier.name:
229 | sys.stderr.write("\tincompatible with this package specifier")
230 | else:
231 | sys.stderr.write("\tenabled")
232 | sys.stderr.flush()
233 |
234 | sys.stdout.write("\n")
235 | sys.stdout.flush()
236 | return 0
237 |
238 | try:
239 | output_file = None
240 | with no_stdout() as real_stdout:
241 | if args.output_file is None or args.output_file == "-":
242 | output_file = real_stdout
243 | elif not args.force and Path(args.output_file).exists():
244 | sys.stderr.write(
245 | f"{args.output_file} already exists!\nRe-run with `--force` to overwrite the file.\n"
246 | )
247 | return 1
248 | else:
249 | output_file = open(args.output_file, "w")
250 | with DBPackageCache(args.database) as cache:
251 | try:
252 | package_list = resolve(
253 | repo,
254 | cache=cache,
255 | depth_limit=args.depth_limit,
256 | max_workers=args.max_workers,
257 | )
258 | except ValueError as e:
259 | if not args.clear_cache or args.PATH_OR_NAME.strip():
260 | sys.stderr.write(f"{e!s}\n")
261 | return 1
262 | if not package_list:
263 | sys.stderr.write(
264 | f"Try --list to check for available resolvers for {args.PATH_OR_NAME}\n"
265 | )
266 | sys.stderr.flush()
267 |
268 | # TODO: Should the cache be updated instead????
269 | if args.audit:
270 | package_list = vulnerabilities(package_list)
271 |
272 | if to_compare is not None:
273 | to_compare_list = resolve(
274 | to_compare,
275 | cache=cache,
276 | depth_limit=args.depth_limit,
277 | max_workers=args.max_workers,
278 | )
279 | output_file.write(
280 | str(
281 | package_list.to_graph().distance_to(
282 | to_compare_list.to_graph(), normalize=args.normalize
283 | )
284 | )
285 | )
286 | output_file.write("\n")
287 | elif args.output_format == "dot":
288 | output_file.write(cache.to_dot(package_list.source_packages).source)
289 | elif args.output_format == "html":
290 | output_file.write(
291 | graph_to_html(package_list, collapse_versions=not args.all_versions)
292 | )
293 | if output_file is not real_stdout:
294 | output_file.flush()
295 | webbrowser.open(output_file.name)
296 | elif args.output_format == "json":
297 | output_file.write(json.dumps(package_list.to_obj(), indent=4))
298 | elif args.output_format == "cyclonedx":
299 | sbom = None
300 | for p in package_list.source_packages:
301 | for bom in resolve_sbom(p, package_list, order_ascending=not args.latest_resolution):
302 | if sbom is None:
303 | sbom = bom
304 | else:
305 | sbom = sbom | bom
306 | # only get the first resolution
307 | # TODO: Provide a means for enumerating all valid SBOMs
308 | break
309 | output_file.write(cyclonedx_to_json(sbom.to_cyclonedx()))
310 | else:
311 | raise NotImplementedError(f"TODO: Implement output format {args.output_format}")
312 | except OperationalError as e:
313 | sys.stderr.write(
314 | f"Database error: {e!r}\n\nThis can occur if your database was created with an older version "
315 | f"of it-depends and was unable to be updated. If you remove {args.database} or run "
316 | "`it-depends --clear-cache` and try again, the database will automatically be rebuilt from "
317 | "scratch."
318 | )
319 | return 1
320 | finally:
321 | if output_file is not None and output_file != sys.stdout:
322 | sys.stderr.write(f"Output saved to {output_file.name}\n")
323 | output_file.close()
324 |
325 | return 0
326 |
--------------------------------------------------------------------------------
/it_depends/db.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any, Dict, FrozenSet, Iterable, Iterator, Optional, Tuple, Union
3 |
4 | from semantic_version import Version
5 | from sqlalchemy import (
6 | Column,
7 | create_engine,
8 | distinct,
9 | ForeignKey,
10 | Integer,
11 | String,
12 | UniqueConstraint,
13 | )
14 | from sqlalchemy.ext.declarative import declarative_base
15 | from sqlalchemy.ext.hybrid import hybrid_property
16 | from sqlalchemy.orm import relationship, sessionmaker
17 |
18 | from .dependencies import (
19 | resolver_by_name,
20 | Dependency,
21 | DependencyResolver,
22 | Package,
23 | SemanticVersion,
24 | PackageCache,
25 | )
26 | from .it_depends import APP_DIRS
27 |
28 | DEFAULT_DB_PATH = Path(APP_DIRS.user_cache_dir) / "dependencies.sqlite"
29 |
30 | Base = declarative_base()
31 |
32 |
33 | class Resolution(Base): # type: ignore
34 | __tablename__ = "resolutions"
35 |
36 | id = Column(Integer, primary_key=True)
37 | package = Column(String, nullable=False)
38 | version = Column(String, nullable=True)
39 | source = Column(String, nullable=True)
40 |
41 | __table_args__ = (
42 | UniqueConstraint("package", "version", "source", name="resolution_unique_constraint"),
43 | )
44 |
45 |
46 | class Updated(Base): # type: ignore
47 | __tablename__ = "updated"
48 |
49 | id = Column(Integer, primary_key=True)
50 | package = Column(String, nullable=False)
51 | version = Column(String, nullable=True)
52 | source = Column(String, nullable=True)
53 | resolver = Column(String, nullable=True)
54 |
55 | __table_args__ = (
56 | UniqueConstraint(
57 | "package", "version", "source", "resolver", name="updated_unique_constraint"
58 | ),
59 | )
60 |
61 |
62 | class DBDependency(Base, Dependency): # type: ignore
63 | __tablename__ = "dependencies"
64 |
65 | id = Column(Integer, primary_key=True)
66 | from_package_id = Column(Integer, ForeignKey("packages.id"))
67 | from_package = relationship("DBPackage", back_populates="raw_dependencies")
68 | source = Column(String, nullable=False)
69 | package = Column(String, nullable=False)
70 | semantic_version_string = Column("semantic_version", String, nullable=True)
71 |
72 | __table_args__ = (
73 | UniqueConstraint(
74 | "from_package_id",
75 | "package",
76 | "semantic_version",
77 | name="dependency_unique_constraint",
78 | ),
79 | )
80 |
81 | def __init__(self, package: "DBPackage", dep: Dependency):
82 | # We intentionally skip calling super().__init__()
83 | self.from_package_id = package.id
84 | self.source = dep.source
85 | self.package = dep.package
86 | self.semantic_version = dep.semantic_version # type: ignore
87 |
88 | @hybrid_property # type: ignore
89 | def semantic_version(self) -> SemanticVersion:
90 | resolver = resolver_by_name(self.source)
91 | return resolver.parse_spec(self.semantic_version_string)
92 |
93 | @semantic_version.setter # type: ignore
94 | def semantic_version(self, new_version: Union[SemanticVersion, str]):
95 | self.semantic_version_string = str(new_version)
96 |
97 |
98 | class DependencyMapping:
99 | def __init__(self, package: "DBPackage"):
100 | super().__init__()
101 | self._deps: Dict[str, Dependency] = {
102 | dep.package: Dependency(
103 | package=dep.package,
104 | source=dep.source,
105 | semantic_version=dep.semantic_version,
106 | )
107 | for dep in package.raw_dependencies
108 | }
109 |
110 | def items(self) -> Iterator[Tuple[str, Dependency]]:
111 | yield from self._deps.items()
112 |
113 | def keys(self) -> Iterable[str]:
114 | return self._deps.keys()
115 |
116 | def values(self) -> Iterable[Dependency]:
117 | return self._deps.values()
118 |
119 | def __setitem__(self, dep_name: str, dep: Dependency):
120 | self._deps[dep_name] = dep
121 |
122 | def __delitem__(self, dep_name: str):
123 | pass
124 |
125 | def __getitem__(self, package_name: str) -> Dependency:
126 | return self._deps[package_name]
127 |
128 | def __len__(self) -> int:
129 | return len(self._deps)
130 |
131 | def __iter__(self) -> Iterator[str]:
132 | return iter(self._deps)
133 |
134 |
135 | class DBPackage(Base, Package): # type: ignore
136 | __tablename__ = "packages"
137 |
138 | id = Column(Integer, primary_key=True)
139 | name = Column(String, nullable=False)
140 | version_str = Column("version", String, nullable=False)
141 | source = Column("source", String, nullable=False)
142 |
143 | __table_args__ = (
144 | UniqueConstraint("name", "version", "source", name="package_unique_constraint"),
145 | )
146 |
147 | raw_dependencies = relationship(
148 | "DBDependency",
149 | back_populates="from_package",
150 | cascade="all, delete, delete-orphan",
151 | )
152 |
153 | def __init__(self, package: Package):
154 | # We intentionally skip calling super().__init__()
155 | self.name = package.name
156 | self.version = package.version
157 | self.source = package.source
158 |
159 | @property
160 | def resolver(self) -> DependencyResolver:
161 | return resolver_by_name(self.source)
162 |
163 | @staticmethod
164 | def from_package(package: Package, session) -> "DBPackage":
165 | if not isinstance(package, DBPackage):
166 | dep_pkg = package
167 | package = DBPackage(package)
168 | session.add(package)
169 | session.flush()
170 | session.add_all([DBDependency(package, dep) for dep in dep_pkg.dependencies])
171 | else:
172 | session.add(package)
173 | return package
174 |
175 | def to_package(self) -> Package:
176 | return Package(
177 | source=self.source,
178 | name=self.name,
179 | version=self.version,
180 | dependencies=(
181 | Dependency(
182 | package=dep.package,
183 | semantic_version=dep.semantic_version,
184 | source=dep.source,
185 | )
186 | for dep in self.raw_dependencies
187 | ),
188 | )
189 |
190 | @property
191 | def version(self) -> Version:
192 | return self.resolver.parse_version(self.version_str)
193 |
194 | @version.setter
195 | def version(self, new_version: Union[Version, str]):
196 | self.version_str = str(new_version)
197 |
198 | @property
199 | def dependencies(self) -> DependencyMapping: # type: ignore
200 | return DependencyMapping(self)
201 |
202 |
203 | class SourceFilteredPackageCache(PackageCache):
204 | def __init__(self, source: Optional[str], parent: "DBPackageCache"):
205 | super().__init__()
206 | self.source: Optional[str] = source
207 | self.parent: DBPackageCache = parent
208 |
209 | def __len__(self):
210 | return (
211 | self.parent.session.query(DBPackage)
212 | .filter(DBPackage.source_name.like(self.source))
213 | .count()
214 | )
215 |
216 | def __iter__(self) -> Iterator[Package]:
217 | yield from [
218 | p.to_package()
219 | for p in self.parent.session.query(DBPackage)
220 | .filter(DBPackage.source_name.like(self.source))
221 | .all()
222 | ]
223 |
224 | def was_resolved(self, dependency: Dependency) -> bool:
225 | return self.parent.was_resolved(dependency)
226 |
227 | def set_resolved(self, dependency: Dependency):
228 | self.parent.set_resolved(dependency)
229 |
230 | def from_source(self, source: Optional[str]) -> "PackageCache":
231 | return SourceFilteredPackageCache(source, self.parent)
232 |
233 | def package_versions(self, package_name: str) -> Iterator[Package]:
234 | yield from [
235 | p.to_package()
236 | for p in self.parent.session.query(DBPackage)
237 | .filter(
238 | DBPackage.name.like(package_name),
239 | DBPackage.source_name.like(self.source),
240 | )
241 | .all()
242 | ]
243 |
244 | def package_full_names(self) -> FrozenSet[str]:
245 | return frozenset(
246 | self.parent.session.query(distinct(DBPackage.name))
247 | .filter(DBPackage.source_name.like(self.source))
248 | .all()
249 | )
250 |
251 | def match(self, to_match: Union[str, Package, Dependency]) -> Iterator[Package]:
252 | return self.parent.match(to_match)
253 |
254 | def add(self, package: Package):
255 | return self.parent.add(package)
256 |
257 | def set_updated(self, package: Package, resolver: str):
258 | return self.parent.set_updated(package, resolver)
259 |
260 | def was_updated(self, package: Package, resolver: str) -> bool:
261 | return self.parent.was_updated(package, resolver)
262 |
263 | def updated_by(self, package: Package) -> FrozenSet[str]:
264 | return self.parent.updated_by(package)
265 |
266 |
267 | class DBPackageCache(PackageCache):
268 | def __init__(self, db: Union[str, Path] = ":memory:"):
269 | super().__init__()
270 | if db == ":memory:":
271 | db = "sqlite:///:memory:"
272 | elif db == "sqlite:///:memory:":
273 | pass
274 | elif isinstance(db, str):
275 | if db.startswith("sqlite:///"):
276 | db = db[len("sqlite:///") :]
277 | db = Path(db)
278 | if isinstance(db, Path):
279 | db.parent.mkdir(parents=True, exist_ok=True)
280 | db = f"sqlite:///{db.absolute()!s}?check_same_thread=False"
281 | self.db: str = db
282 | self._session = None
283 |
284 | def open(self):
285 | if isinstance(self.db, str):
286 | db = create_engine(self.db)
287 | else:
288 | db = self.db
289 | self._session = sessionmaker(bind=db)()
290 | Base.metadata.create_all(db)
291 |
292 | def close(self):
293 | self._session = None
294 |
295 | @property
296 | def session(self):
297 | return self._session
298 |
299 | def add(self, package: Package):
300 | self.extend((package,))
301 |
302 | def extend(self, packages: Iterable[Package]):
303 | for package in packages:
304 | for existing in self.match(package):
305 | if len(existing.dependencies) > len(package.dependencies):
306 | raise ValueError(
307 | f"Package {package!s} has already been resolved with more dependencies: "
308 | f"{existing!s}"
309 | )
310 | elif existing.dependencies != package.dependencies:
311 | existing.dependencies = package.dependencies
312 | self.session.commit()
313 | found_existing = True
314 | break
315 | else:
316 | found_existing = False
317 | if found_existing:
318 | continue
319 | if isinstance(package, DBPackage):
320 | self.session.add(package)
321 | else:
322 | _ = DBPackage.from_package(package, self.session)
323 | self.session.commit()
324 |
325 | def __len__(self):
326 | return self.session.query(DBPackage).count()
327 |
328 | def __iter__(self) -> Iterator[Package]:
329 | yield from self.session.query(DBPackage).all()
330 |
331 | def from_source(self, source: Optional[str]) -> SourceFilteredPackageCache:
332 | return SourceFilteredPackageCache(source, self)
333 |
334 | def package_versions(self, package_full_name: str) -> Iterator[Package]:
335 | yield from [
336 | p.to_package()
337 | for p in self.session.query(DBPackage)
338 | .filter(DBPackage.name.like(package_full_name))
339 | .all()
340 | ]
341 |
342 | def package_full_names(self) -> FrozenSet[str]:
343 | return frozenset(
344 | f"{result[0]}:{result[1]}"
345 | for result in self.session.query(
346 | distinct(DBPackage.source), distinct(DBPackage.name)
347 | ).all()
348 | )
349 |
350 | def _make_query(self, to_match: Union[str, Package], source: Optional[str] = None):
351 | if source is None and isinstance(to_match, Package):
352 | source = to_match.source
353 | if source is not None:
354 | filters: Tuple[Any, ...] = (DBPackage.source.like(source),)
355 | else:
356 | filters = ()
357 | if isinstance(to_match, Package):
358 | return self.session.query(DBPackage).filter(
359 | DBPackage.name.like(to_match.name),
360 | DBPackage.version_str.like(str(to_match.version)),
361 | *filters,
362 | )
363 | else:
364 | return self.session.query(DBPackage).filter(DBPackage.name.like(to_match), *filters)
365 |
366 | def match(self, to_match: Union[str, Package, Dependency]) -> Iterator[Package]:
367 | if isinstance(to_match, Dependency):
368 | for package in self._make_query(to_match.package, source=to_match.source):
369 | if package.version in to_match.semantic_version:
370 | yield package.to_package()
371 | else:
372 | if isinstance(to_match, Package):
373 | source: Optional[str] = to_match.source
374 | else:
375 | source = None
376 | # we intentionally build a list before yielding so that we don't keep the session query lingering
377 | yield from [
378 | package.to_package() for package in self._make_query(to_match, source=source).all()
379 | ]
380 |
381 | def was_resolved(self, dependency: Dependency) -> bool:
382 | return (
383 | self.session.query(Resolution)
384 | .filter(
385 | Resolution.package.like(dependency.package),
386 | Resolution.version == str(dependency.semantic_version),
387 | Resolution.source.like(dependency.source),
388 | )
389 | .limit(1)
390 | .count()
391 | > 0
392 | )
393 |
394 | def set_resolved(self, dependency: Dependency):
395 | if self.was_resolved(dependency):
396 | return
397 | self.session.add(
398 | Resolution(
399 | package=dependency.package,
400 | version=str(dependency.semantic_version),
401 | source=dependency.source,
402 | )
403 | )
404 | self.session.commit()
405 |
406 | def updated_by(self, package: Package) -> FrozenSet[str]:
407 | return frozenset(
408 | u.resolver
409 | for u in self.session.query(Updated).filter(
410 | Updated.source.like(package.source),
411 | Updated.package.like(package.name),
412 | Updated.version == str(package.version),
413 | )
414 | )
415 |
416 | def was_updated(self, package: Package, resolver: str) -> bool:
417 | if package.source == resolver:
418 | return True
419 | return (
420 | self.session.query(Updated)
421 | .filter(
422 | Updated.source.like(package.source),
423 | Updated.package.like(package.name),
424 | Updated.version.like(str(package.version)),
425 | Updated.resolver.like(resolver),
426 | )
427 | .limit(1)
428 | .count()
429 | > 0
430 | )
431 |
432 | def set_updated(self, package: Package, resolver: str):
433 | if self.was_updated(package, resolver):
434 | return
435 | self.session.add(
436 | Updated(
437 | package=package.name,
438 | version=str(package.version),
439 | source=package.source,
440 | resolver=resolver,
441 | )
442 | )
443 | self.session.commit()
444 |
--------------------------------------------------------------------------------
/it_depends/docker.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | import shutil
4 | import subprocess
5 | import sys
6 | import os
7 | from pathlib import Path
8 | from tempfile import mkdtemp
9 | from tqdm import tqdm
10 | from typing import Dict, Iterable, List, Optional, Tuple, Union
11 |
12 | import docker
13 | from docker.errors import NotFound as ImageNotFound, DockerException
14 | from docker.models.images import Image
15 |
16 | from . import version as it_depends_version
17 |
18 |
19 | def _discover_podman_socket():
20 | """Try to discover a Podman socket.
21 |
22 | Discovery is performed in this order:
23 |
24 | * If the user is non-root, rootless Podman
25 | * If the user is root, rooted Podman
26 | """
27 |
28 | euid = os.geteuid()
29 | if euid != 0:
30 | # Non-root: use XDG_RUNTIME_DIR to try and find the user's Podman socket,
31 | # falling back on the systemd-enforced default.
32 | # Ref: https://docs.podman.io/en/latest/markdown/podman-system-service.1.html
33 | runtime_dir = Path(os.environ.get("XDG_RUNTIME_DIR", f"/run/user/{euid}/"))
34 | if not runtime_dir.is_dir():
35 | return None
36 |
37 | sock_path = runtime_dir / "podman/podman.sock"
38 | else:
39 | # Root: check for /run/podman/podman.sock and nothing else.
40 | sock_path = Path("/run/podman/podman.sock")
41 |
42 | if not sock_path.is_socket():
43 | return None
44 |
45 | return f"unix://{sock_path}"
46 |
47 |
48 | class Dockerfile:
49 | def __init__(self, path: Path):
50 | self._path: Path = path
51 | self._len: Optional[int] = None
52 | self._line_offsets: Dict[int, int] = {}
53 |
54 | @property
55 | def path(self) -> Path:
56 | return self._path
57 |
58 | @path.setter
59 | def path(self, new_path: Path):
60 | self._path = new_path
61 | self._len = None
62 | self._line_offsets = {}
63 |
64 | def __enter__(self) -> "Dockerfile":
65 | return self
66 |
67 | def __exit__(self, exc_type, exc_val, exc_tb):
68 | pass
69 |
70 | def exists(self) -> bool:
71 | return self.path.exists()
72 |
73 | def dir(self) -> Path:
74 | return self.path.parent
75 |
76 | def __len__(self) -> int:
77 | """Returns the number of lines in the file"""
78 | if self._len is None:
79 | self._len = 0
80 | self._line_offsets[0] = 0 # line 0 starts at offset 0
81 | offset = 0
82 | with open(self.path, "rb") as f:
83 | while True:
84 | chunk = f.read(1)
85 | if len(chunk) == 0:
86 | break
87 | elif chunk == b"\n":
88 | self._len += 1
89 | self._line_offsets[self._len] = offset + 1
90 | offset += 1
91 | return self._len
92 |
93 | def get_line(self, step_command: str, starting_line: int = 0) -> Optional[int]:
94 | """Returns the line number of the associated step command"""
95 | if self._len is None:
96 | # we need to call __len__ to set self._line_offsets
97 | _ = len(self)
98 | if starting_line not in self._line_offsets:
99 | return None
100 | with open(self.path, "r") as f:
101 | f.seek(self._line_offsets[starting_line])
102 | line_offset = 0
103 | while True:
104 | line = f.readline()
105 | if line == "":
106 | break
107 | elif line == step_command:
108 | return starting_line + line_offset
109 | line_offset += 1
110 | return None
111 |
112 |
113 | class InMemoryFile:
114 | def __init__(self, filename: str, content: bytes):
115 | self.filename: str = filename
116 | self.content: bytes = content
117 |
118 |
119 | class InMemoryDockerfile(Dockerfile):
120 | def __init__(self, content: str, local_files: Iterable[InMemoryFile] = ()):
121 | super().__init__(None) # type: ignore
122 | self.content: str = content
123 | self.local_files: List[InMemoryFile] = list(local_files)
124 | self._entries: int = 0
125 | self._tmpdir: Optional[Path] = None
126 |
127 | @Dockerfile.path.getter # type: ignore
128 | def path(self) -> Path:
129 | path = super().path
130 | if path is None:
131 | raise ValueError(
132 | "InMemoryDockerfile only has a valid path when inside of its context manager"
133 | )
134 | return path
135 |
136 | def __enter__(self) -> "InMemoryDockerfile":
137 | self._entries += 1
138 | if self._entries == 1:
139 | self._tmpdir = Path(mkdtemp())
140 | for file in self.local_files:
141 | with open(self._tmpdir / file.filename, "wb") as f:
142 | f.write(file.content)
143 | self.path = self._tmpdir / "Dockerfile"
144 | with open(self.path, "w") as d:
145 | d.write(self.content)
146 | return self
147 |
148 | def __exit__(self, exc_type, exc_val, exc_tb):
149 | self._entries -= 1
150 | if self._entries == 0:
151 | self.path.unlink()
152 | shutil.rmtree(self._tmpdir)
153 | self.path = None # type: ignore
154 |
155 |
156 | class DockerContainer:
157 | def __init__(
158 | self,
159 | image_name: str,
160 | dockerfile: Optional[Dockerfile] = None,
161 | tag: Optional[str] = None,
162 | ):
163 | self.image_name: str = image_name
164 | if tag is None:
165 | self.tag: str = it_depends_version()
166 | else:
167 | self.tag = tag
168 | self._client: Optional[docker.DockerClient] = None
169 | self.dockerfile: Optional[Dockerfile] = dockerfile
170 |
171 | def run(
172 | self,
173 | *args: str,
174 | check_existence: bool = True,
175 | rebuild: bool = True,
176 | build_if_necessary: bool = True,
177 | remove: bool = True,
178 | interactive: bool = True,
179 | mounts: Optional[Iterable[Tuple[Union[str, Path], Union[str, Path]]]] = None,
180 | privileged: bool = False,
181 | env: Optional[Dict[str, str]] = None,
182 | stdin=None,
183 | stdout=None,
184 | stderr=None,
185 | cwd=None,
186 | ):
187 | if rebuild:
188 | self.rebuild()
189 | elif check_existence and not self.exists():
190 | if build_if_necessary:
191 | if self.dockerfile is not None and self.dockerfile.exists():
192 | self.rebuild()
193 | else:
194 | self.pull()
195 | if not self.exists():
196 | raise ValueError(f"{self.name} does not exist!")
197 | else:
198 | raise ValueError(
199 | f"{self.name} does not exist! Re-run with `build_if_necessary=True` to automatically "
200 | "build it."
201 | )
202 | if cwd is None:
203 | cwd = str(Path.cwd())
204 |
205 | # Call out to the actual Docker command instead of the Python API because it has better support for interactive
206 | # TTYs
207 |
208 | if interactive and (stdin is not None or stdout is not None or stderr is not None):
209 | raise ValueError(
210 | "if `interactive == True`, all of `stdin`, `stdout`, and `stderr` must be `None`"
211 | )
212 |
213 | cmd_args = [str(Path("/usr") / "bin" / "env"), "docker", "run"]
214 |
215 | if interactive:
216 | cmd_args.append("-it")
217 |
218 | if remove:
219 | cmd_args.append("--rm")
220 |
221 | if mounts is not None:
222 | for source, target in mounts:
223 | cmd_args.append("-v")
224 | if not isinstance(source, Path):
225 | source = Path(source)
226 | source = source.absolute()
227 | cmd_args.append(f"{source!s}:{target!s}:cached")
228 |
229 | if env is not None:
230 | for k, v in env.items():
231 | cmd_args.append("-e")
232 | escaped_value = v.replace('"', '\\"')
233 | cmd_args.append(f"{k}={escaped_value}")
234 |
235 | if privileged:
236 | cmd_args.append("--privileged=true")
237 |
238 | cmd_args.append(self.name)
239 |
240 | cmd_args.extend(args)
241 |
242 | if interactive:
243 | return subprocess.call(cmd_args, cwd=cwd, stdout=sys.stderr)
244 | else:
245 | return subprocess.run(cmd_args, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
246 |
247 | # self.client.containers.run(self.name, args, remove=remove, mounts=[
248 | # Mount(target=str(target), source=str(source), consistency="cached") for source, target in mounts
249 | # ])
250 |
251 | @property
252 | def name(self) -> str:
253 | return f"{self.image_name}:{self.tag}"
254 |
255 | @property
256 | def client(self) -> docker.DockerClient:
257 | if self._client is None:
258 | self._client = docker.from_env()
259 | return self._client
260 |
261 | def exists(self) -> Optional[Image]:
262 | for image in self.client.images.list():
263 | if self.name in image.tags:
264 | return image
265 | return None
266 |
267 | def pull(self, latest: bool = False) -> Image:
268 | # We could use the Python API to pull, like this:
269 | # return self.client.images.pull(self.image_name, tag=[self.tag, None][latest])
270 | # However, that doesn't include progress bars. So call the `docker` command instead:
271 | name = f"{self.image_name}:{[self.tag, 'latest'][latest]}"
272 | try:
273 | subprocess.check_call(["docker", "pull", name])
274 | for image in self.client.images.list():
275 | if name in image.tags:
276 | return image
277 | except subprocess.CalledProcessError:
278 | pass
279 | raise ImageNotFound(name)
280 |
281 | def rebuild(self, nocache: bool = False):
282 | if self.dockerfile is None:
283 | _ = self.pull()
284 | return
285 | elif not self.dockerfile.exists():
286 | raise ValueError("Could not find the Dockerfile.")
287 | # use the low-level APIClient so we can get streaming build status
288 | try:
289 | sock = _discover_podman_socket()
290 | cli = docker.APIClient(base_url=sock)
291 | except DockerException as e:
292 | raise ValueError(f"Could not connect to socket: sock={sock} {e}") from e
293 | with tqdm(desc="Archiving the build directory", unit=" steps", leave=False) as t:
294 | last_line = 0
295 | last_step = None
296 | for raw_line in cli.build(
297 | path=str(self.dockerfile.dir()),
298 | rm=True,
299 | tag=self.name,
300 | nocache=nocache,
301 | forcerm=True,
302 | ):
303 | t.desc = f"Building {self.name}"
304 | for line in raw_line.split(b"\n"):
305 | try:
306 | line = json.loads(line)
307 | except json.decoder.JSONDecodeError:
308 | continue
309 | if "stream" in line:
310 | m = re.match(
311 | r"^Step\s+(\d+)(/(\d+))?\s+:\s+(.+)$",
312 | line["stream"],
313 | re.MULTILINE,
314 | )
315 | if m:
316 | if m.group(3):
317 | # Docker told us the total number of steps!
318 | total_steps = int(m.group(3))
319 | current_step = int(m.group(1))
320 | if last_step is None:
321 | t.total = total_steps
322 | last_step = 0
323 | t.update(current_step - last_step)
324 | last_step = current_step
325 | else:
326 | # Docker didn't tell us the total number of steps, so infer it from our line
327 | # number in the Dockerfile
328 | t.total = len(self.dockerfile)
329 | new_line = self.dockerfile.get_line(
330 | m.group(4), starting_line=last_line
331 | )
332 | if new_line is not None:
333 | t.update(new_line - last_line)
334 | last_line = new_line
335 | t.write(line["stream"].replace("\n", "").strip())
336 |
--------------------------------------------------------------------------------
/it_depends/go.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from datetime import datetime
3 | from html.parser import HTMLParser
4 | from logging import getLogger
5 | import os
6 | from pathlib import Path
7 | import re
8 | from subprocess import check_call, check_output, DEVNULL, CalledProcessError
9 | from tempfile import TemporaryDirectory
10 | from typing import Iterable, Iterator, List, Optional, Tuple, Union
11 | from urllib import request
12 | from urllib.error import HTTPError, URLError
13 |
14 | from semantic_version import Version
15 | from semantic_version.base import BaseSpec, Range, SimpleSpec
16 |
17 | from .dependencies import (
18 | Dependency,
19 | DependencyResolver,
20 | SourcePackage,
21 | SourceRepository,
22 | Package,
23 | PackageCache,
24 | SemanticVersion,
25 | )
26 | from . import vcs
27 |
28 | log = getLogger(__file__)
29 |
30 | GITHUB_URL_MATCH = re.compile(
31 | r"\s*https?://(www\.)?github.com/([^/]+)/(.+?)(\.git)?\s*", re.IGNORECASE
32 | )
33 | REQUIRE_LINE_REGEX = r"\s*([^\s]+)\s+([^\s]+)\s*(//\s*indirect\s*)?"
34 | REQUIRE_LINE_MATCH = re.compile(REQUIRE_LINE_REGEX)
35 | REQUIRE_MATCH = re.compile(rf"\s*require\s+{REQUIRE_LINE_REGEX}")
36 | REQUIRE_BLOCK_MATCH = re.compile(r"\s*require\s+\(\s*")
37 | MODULE_MATCH = re.compile(r"\s*module\s+([^\s]+)\s*")
38 |
39 | GOPATH: Optional[str] = os.environ.get("GOPATH", None)
40 |
41 |
42 | @dataclass(frozen=True, unsafe_hash=True)
43 | class MetaImport:
44 | prefix: str
45 | vcs: str
46 | repo_root: str
47 |
48 |
49 | class MetadataParser(HTMLParser):
50 | in_meta: bool = False
51 | metadata: List[MetaImport] = []
52 |
53 | def error(self, message):
54 | pass
55 |
56 | def handle_starttag(self, tag, attrs):
57 | if tag == "meta":
58 | attrs = dict(attrs)
59 | if attrs.get("name", "") == "go-import":
60 | fields = attrs.get("content", "").split(" ")
61 | if len(fields) == 3:
62 | self.metadata.append(MetaImport(*fields))
63 |
64 |
65 | def git_commit(path: Optional[str] = None) -> Optional[str]:
66 | try:
67 | return check_output(["git", "rev-parse", "HEAD"], cwd=path, stderr=DEVNULL).decode("utf-8")
68 | except CalledProcessError:
69 | return None
70 |
71 |
72 | class GoVersion:
73 | def __init__(self, go_version_string: str):
74 | self.version_string: str = go_version_string.strip()
75 | if self.version_string.startswith("="):
76 | self.version_string = self.version_string[1:]
77 | self.build: bool = False # This is to appease semantic_version.base.SimpleSpec
78 |
79 | def __lt__(self, other):
80 | return self.version_string < str(other)
81 |
82 | def __eq__(self, other):
83 | return isinstance(other, GoVersion) and self.version_string == other.version_string
84 |
85 | def __hash__(self):
86 | return hash(self.version_string)
87 |
88 | def __str__(self):
89 | return self.version_string
90 |
91 |
92 | @BaseSpec.register_syntax
93 | class GoSpec(SimpleSpec):
94 | SYNTAX = "go"
95 |
96 | class Parser(SimpleSpec.Parser):
97 | @classmethod
98 | def parse(cls, expression):
99 | if expression.startswith("="):
100 | expression = expression[1:]
101 | return Range(operator=Range.OP_EQ, target=GoVersion(expression))
102 |
103 | def __contains__(self, item):
104 | return item == self.clause.target
105 |
106 |
107 | class GoModule:
108 | def __init__(self, name: str, dependencies: Iterable[Tuple[str, str]] = ()):
109 | self.name: str = name
110 | self.dependencies: List[Tuple[str, str]] = list(dependencies)
111 |
112 | @staticmethod
113 | def tag_to_git_hash(tag: str) -> str:
114 | segments = tag.split("-")
115 | if len(segments) == 3:
116 | return segments[-1]
117 | else:
118 | return tag
119 |
120 | @staticmethod
121 | def parse_mod(mod_content: Union[str, bytes]) -> "GoModule":
122 | if isinstance(mod_content, bytes):
123 | mod_content = mod_content.decode("utf-8")
124 | in_require = False
125 | dependencies = []
126 | name = None
127 | for line in mod_content.split("\n"):
128 | if not in_require:
129 | m = REQUIRE_MATCH.match(line)
130 | if m:
131 | dependencies.append((m.group(1), m.group(2)))
132 | else:
133 | if name is None:
134 | m = MODULE_MATCH.match(line)
135 | if m:
136 | name = m.group(1)
137 | continue
138 | in_require = bool(REQUIRE_BLOCK_MATCH.match(line))
139 | elif line.strip() == ")":
140 | in_require = False
141 | else:
142 | m = REQUIRE_LINE_MATCH.match(line)
143 | if m:
144 | dependencies.append((m.group(1), m.group(2)))
145 | if name is None:
146 | raise ValueError("Missing `module` line in go mod specification")
147 | return GoModule(name, dependencies)
148 |
149 | @staticmethod
150 | def from_github(github_org: str, github_repo: str, tag: str):
151 | github_url = f"https://raw.githubusercontent.com/{github_org}/{github_repo}/{tag}/go.mod"
152 | try:
153 | with request.urlopen(github_url) as response:
154 | return GoModule.parse_mod(response.read())
155 | except HTTPError as e:
156 | if e.code == 404:
157 | # Revert to cloning the repo
158 | return GoModule.from_git(
159 | import_path=f"github.com/{github_org}/{github_repo}",
160 | git_url=f"https://github.com/{github_org}/{github_repo}",
161 | tag=tag,
162 | check_for_github=False,
163 | )
164 | raise
165 |
166 | @staticmethod
167 | def from_git(
168 | import_path: str,
169 | git_url: str,
170 | tag: str,
171 | check_for_github: bool = True,
172 | force_clone: bool = False,
173 | ):
174 | if check_for_github:
175 | m = GITHUB_URL_MATCH.fullmatch(git_url)
176 | if m:
177 | return GoModule.from_github(m.group(2), m.group(3), tag)
178 | log.info(f"Attempting to clone {git_url}")
179 | with TemporaryDirectory() as tempdir:
180 | env = {"GIT_TERMINAL_PROMPT": "0"}
181 | if os.environ.get("GIT_SSH", "") == "" and os.environ.get("GIT_SSH_COMMAND", "") == "":
182 | # disable any ssh connection pooling by git
183 | env["GIT_SSH_COMMAND"] = "ssh -o ControlMaster=no"
184 | if tag == "*" or force_clone:
185 | # this will happen if we are resolving a wildcard, typically if the user called something like
186 | # `it-depends go:github.com/ethereum/go-ethereum`
187 | td = Path(tempdir)
188 | check_call(
189 | ["git", "clone", "--depth", "1", git_url, td.name],
190 | cwd=td.parent,
191 | stderr=DEVNULL,
192 | stdout=DEVNULL,
193 | env=env,
194 | )
195 | else:
196 | check_call(["git", "init"], cwd=tempdir, stderr=DEVNULL, stdout=DEVNULL)
197 | check_call(
198 | ["git", "remote", "add", "origin", git_url],
199 | cwd=tempdir,
200 | stderr=DEVNULL,
201 | stdout=DEVNULL,
202 | )
203 | git_hash = GoModule.tag_to_git_hash(tag)
204 | try:
205 | check_call(
206 | ["git", "fetch", "--depth", "1", "origin", git_hash],
207 | cwd=tempdir,
208 | stderr=DEVNULL,
209 | stdout=DEVNULL,
210 | env=env,
211 | )
212 | except CalledProcessError:
213 | # not all git servers support `git fetch --depth 1` on a hash
214 | try:
215 | check_call(
216 | ["git", "fetch", "origin"],
217 | cwd=tempdir,
218 | stderr=DEVNULL,
219 | stdout=DEVNULL,
220 | env=env,
221 | )
222 | except CalledProcessError:
223 | log.error(f"Could not clone {git_url} for {import_path!r}")
224 | return GoModule(import_path)
225 | try:
226 | check_call(
227 | ["git", "checkout", git_hash],
228 | cwd=tempdir,
229 | stderr=DEVNULL,
230 | stdout=DEVNULL,
231 | env=env,
232 | )
233 | except CalledProcessError:
234 | if tag.startswith("="):
235 | return GoModule.from_git(import_path, git_url, tag[1:])
236 | log.warning(
237 | f"Could not checkout tag {tag} of {git_url} for {import_path!r}; "
238 | "reverting to the main branch"
239 | )
240 | return GoModule.from_git(
241 | import_path,
242 | git_url,
243 | tag,
244 | check_for_github=False,
245 | force_clone=True,
246 | )
247 | go_mod_path = Path(tempdir) / "go.mod"
248 | if not go_mod_path.exists():
249 | # the package likely doesn't have any dependencies
250 | return GoModule(import_path)
251 | with open(Path(tempdir) / "go.mod", "r") as f:
252 | return GoModule.parse_mod(f.read())
253 |
254 | @staticmethod
255 | def url_for_import_path(import_path: str) -> str:
256 | """
257 | returns a partially-populated URL for the given Go import path.
258 |
259 | The URL leaves the Scheme field blank so that web.Get will try any scheme
260 | allowed by the selected security mode.
261 | """
262 | slash = import_path.find("/")
263 | if slash == -1:
264 | raise vcs.VCSResolutionError("import path does not contain a slash")
265 | host, path = import_path[:slash], import_path[slash:]
266 | if "." not in host:
267 | raise vcs.VCSResolutionError("import path does not begin with hostname")
268 | if not path.startswith("/"):
269 | path = f"/{path}"
270 | return f"https://{host}{path}?go-get=1"
271 |
272 | @staticmethod
273 | def meta_imports_for_prefix(import_prefix: str) -> Tuple[str, List[MetaImport]]:
274 | url = GoModule.url_for_import_path(import_prefix)
275 | with request.urlopen(url) as req:
276 | return url, GoModule.parse_meta_go_imports(req.read().decode("utf-8"))
277 |
278 | @staticmethod
279 | def match_go_import(imports: Iterable[MetaImport], import_path: str) -> MetaImport:
280 | match: Optional[MetaImport] = None
281 | for i, m in enumerate(imports):
282 | if not import_path.startswith(m.prefix):
283 | continue
284 | elif match is not None:
285 | if match.vcs == "mod" and m.vcs != "mod":
286 | break
287 | raise ValueError(f"Multiple meta tags match import path {import_path!r}")
288 | match = m
289 | if match is None:
290 | raise ValueError(f"Unable to match import path {import_path!r}")
291 | return match
292 |
293 | @staticmethod
294 | def parse_meta_go_imports(metadata: str) -> List[MetaImport]:
295 | parser = MetadataParser()
296 | parser.feed(metadata)
297 | return parser.metadata
298 |
299 | @staticmethod
300 | def repo_root_for_import_dynamic(import_path: str) -> vcs.Repository:
301 | url = GoModule.url_for_import_path(import_path)
302 | try:
303 | imports = GoModule.parse_meta_go_imports(request.urlopen(url).read().decode("utf-8"))
304 | except (HTTPError, URLError):
305 | raise ValueError(f"Could not download metadata from {url} for import {import_path!s}")
306 | meta_import = GoModule.match_go_import(imports, import_path)
307 | if meta_import.prefix != import_path:
308 | new_url, imports = GoModule.meta_imports_for_prefix(meta_import.prefix)
309 | meta_import2 = GoModule.match_go_import(imports, import_path)
310 | if meta_import != meta_import2:
311 | raise ValueError(
312 | f"{url} and {new_url} disagree about go-import for {meta_import.prefix!r}"
313 | )
314 | # validateRepoRoot(meta_import.RepoRoot)
315 | if meta_import.vcs == "mod":
316 | the_vcs = vcs.VCS_MOD
317 | else:
318 | the_vcs = vcs.vcs_by_cmd(meta_import.vcs) # type: ignore
319 | if the_vcs is None:
320 | raise ValueError(f"{url}: unknown VCS {meta_import.vcs!r}")
321 | vcs.check_go_vcs(the_vcs, meta_import.prefix)
322 | return vcs.Repository(
323 | repo=meta_import.repo_root,
324 | root=meta_import.prefix,
325 | is_custom=True,
326 | vcs=the_vcs,
327 | )
328 |
329 | @staticmethod
330 | def repo_root_for_import_path(import_path: str) -> vcs.Repository:
331 | try:
332 | return vcs.resolve(import_path)
333 | except vcs.VCSResolutionError:
334 | pass
335 | return GoModule.repo_root_for_import_dynamic(import_path)
336 |
337 | @staticmethod
338 | def from_import(import_path: str, tag: str) -> "GoModule":
339 | try:
340 | repo = GoModule.repo_root_for_import_path(import_path)
341 | except ValueError as e:
342 | log.warning(str(e))
343 | return GoModule(import_path)
344 | if repo.vcs.name == "Git":
345 | return GoModule.from_git(import_path, repo.repo, tag)
346 | else:
347 | raise NotImplementedError(f"TODO: add support for VCS type {repo.vcs.name}")
348 |
349 | @staticmethod
350 | def load(name_or_url: str, tag: str = "master") -> "GoModule":
351 | if not name_or_url.startswith("http://") and not name_or_url.startswith("https://"):
352 | return GoModule.from_import(name_or_url, tag)
353 | else:
354 | return GoModule.from_git(name_or_url, name_or_url, tag)
355 |
356 |
357 | class GoResolver(DependencyResolver):
358 | name = "go"
359 | description = "classifies the dependencies of JavaScript packages using `npm`"
360 |
361 | def resolve(self, dependency: Dependency) -> Iterator[Package]:
362 | # assert isinstance(dependency.semantic_version, GoSpec)
363 | version_string = str(dependency.semantic_version)
364 | module = GoModule.from_import(dependency.package, version_string)
365 | yield Package(
366 | name=module.name,
367 | version=GoVersion(version_string), # type: ignore
368 | source=dependency.source,
369 | dependencies=[
370 | Dependency(
371 | package=package,
372 | semantic_version=GoSpec(f"={version}"),
373 | source=dependency.source,
374 | )
375 | for package, version in module.dependencies
376 | ],
377 | )
378 |
379 | @classmethod
380 | def parse_spec(cls, spec: str) -> SemanticVersion:
381 | return GoSpec(spec)
382 |
383 | @classmethod
384 | def parse_version(cls, version_string: str) -> Version:
385 | return GoVersion(version_string) # type: ignore
386 |
387 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
388 | return bool(self.is_available()) and (repo.path / "go.mod").exists()
389 |
390 | def resolve_from_source(self, repo: SourceRepository, cache: Optional[PackageCache] = None):
391 | if not self.can_resolve_from_source(repo):
392 | return None
393 |
394 | with open(repo.path / "go.mod") as f:
395 | module = GoModule.parse_mod(f.read())
396 | git_hash = git_commit(str(repo.path))
397 | timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
398 | version = f"v0.0.0-{timestamp}-"
399 | if git_hash is None:
400 | version = f"{version}????"
401 | else:
402 | version = f"{version}{git_hash}"
403 | return SourcePackage(
404 | name=module.name,
405 | version=GoVersion(version), # type: ignore
406 | source_repo=repo,
407 | source=self.name,
408 | dependencies=[
409 | Dependency(package=package, semantic_version=GoSpec(f"={version}"), source=self)
410 | for package, version in module.dependencies
411 | ],
412 | )
413 |
--------------------------------------------------------------------------------
/it_depends/graphs.py:
--------------------------------------------------------------------------------
1 | from typing import (
2 | Dict,
3 | Generic,
4 | Iterable,
5 | Iterator,
6 | Optional,
7 | Set,
8 | Tuple,
9 | Type,
10 | TypeVar,
11 | Union,
12 | )
13 |
14 | import networkx as nx
15 |
16 |
17 | T = TypeVar("T")
18 | R = TypeVar("R")
19 |
20 |
21 | class RootedDiGraph(nx.DiGraph, Generic[T, R]):
22 | root_type: Type[R]
23 |
24 | def __init__(self, *args, **kwargs):
25 | super().__init__(*args, **kwargs)
26 | self.roots: Set[R] = set()
27 | self._all_pairs_shortest_paths: Optional[Dict[T, Dict[T, int]]] = None
28 | self._shortest_path_from_root: Optional[Dict[T, int]] = None
29 |
30 | def __init_subclass__(cls, **kwargs):
31 | if not hasattr(cls, "root_type") or getattr(cls, "root_type") is None:
32 | raise TypeError(f"{cls.__name__} must assign a `root_type` class variable")
33 |
34 | def shortest_path_from_root(self, node: T) -> int:
35 | """Returns the shortest path from a root to node.
36 |
37 | If there are no roots in the graph or there is no path from a root, return -1.
38 |
39 | """
40 | if not self.roots:
41 | return -1
42 | if len(self.roots) > 1:
43 | path_lengths = [self.shortest_path_length(root, node) for root in self.roots]
44 | return min(length for length in path_lengths if length >= 0)
45 | elif self._shortest_path_from_root is None:
46 | self._shortest_path_from_root = nx.single_source_shortest_path_length(
47 | self, next(iter(self.roots))
48 | ) # type: ignore
49 | return self._shortest_path_from_root[node]
50 |
51 | def shortest_path_length(self, from_node: Union[T, R], to_node: T) -> int:
52 | if self._all_pairs_shortest_paths is None:
53 | self._all_pairs_shortest_paths = dict(nx.all_pairs_shortest_path_length(self)) # type: ignore
54 | if (
55 | from_node not in self._all_pairs_shortest_paths
56 | or to_node not in self._all_pairs_shortest_paths[from_node] # type: ignore
57 | ): # type: ignore
58 | return -1
59 | return self._all_pairs_shortest_paths[from_node][to_node] # type: ignore
60 |
61 | def _handle_new_node(self, node: T):
62 | if isinstance(node, self.root_type):
63 | self.roots.add(node)
64 |
65 | def _handle_removed_node(self, node: T):
66 | if isinstance(node, self.root_type):
67 | self.roots.remove(node)
68 |
69 | def add_node(self, node_for_adding: T, **attr):
70 | self._handle_new_node(node_for_adding)
71 | return super().add_node(node_for_adding, **attr)
72 |
73 | def add_nodes_from(self, nodes_for_adding: Iterable[T], **attr):
74 | nodes = []
75 | for node in nodes_for_adding:
76 | self._handle_new_node(node)
77 | nodes.append(node)
78 | return super().add_nodes_from(nodes)
79 |
80 | def add_edge(self, u_of_edge: T, v_of_edge: T, **attr):
81 | self._handle_new_node(u_of_edge)
82 | self._handle_new_node(v_of_edge)
83 | return super().add_edge(u_of_edge, v_of_edge, **attr)
84 |
85 | def add_edges_from(
86 | self, ebunch_to_add: Iterable[Union[Tuple[T, T], Tuple[T, T, Dict]]], **attr
87 | ):
88 | edges = []
89 | for u, v, *r in ebunch_to_add:
90 | self._handle_new_node(u)
91 | self._handle_new_node(v)
92 | edges.append((u, v, *r))
93 | super().add_edges_from(ebunch_to_add, **attr)
94 |
95 | def remove_node(self, node_for_removing: T):
96 | self._handle_removed_node(node_for_removing)
97 | return super().remove_node(node_for_removing)
98 |
99 | def remove_nodes_from(self, nodes_for_removing: Iterable[T]):
100 | nodes = []
101 | for node in nodes_for_removing:
102 | self._handle_removed_node(node)
103 | nodes.append(node)
104 | return super().remove_nodes_from(nodes)
105 |
106 | def find_roots(self) -> "RootedDiGraph[T, T]":
107 | graph: RootedDiGraph[T, T] = RootedDiGraph()
108 | graph.root_type = self.root_type # type: ignore
109 | graph.add_nodes_from(self.nodes)
110 | graph.add_edges_from(self.edges)
111 | graph.roots = {n for n, d in self.in_degree() if d == 0} # type: ignore
112 | return graph
113 |
114 | def __iter__(self) -> Iterator[T]:
115 | yield from super().__iter__()
116 |
117 | def distance_to(self, graph: "RootedDiGraph[T, R]", normalize: bool = False) -> float:
118 | return compare_rooted_graphs(self, graph, normalize)
119 |
120 |
121 | def compare_rooted_graphs(
122 | graph1: RootedDiGraph[T, R], graph2: RootedDiGraph[T, R], normalize: bool = False
123 | ) -> float:
124 | """Calculates the edit distance between two rooted graphs.
125 |
126 | If normalize == False (the default), a value of zero means the graphs are identical, with increasing values
127 | corresponding to the difference between the graphs.
128 |
129 | If normalize == True, the returned value equals 1.0 iff the graphs are identical and values closer to zero if the
130 | graphs are less similar.
131 |
132 | """
133 | if not graph1.roots or not graph2.roots:
134 | raise ValueError("Both graphs must have at least one root")
135 | nodes1 = {node for node in graph1 if node not in graph1.roots}
136 | nodes2 = {node for node in graph2 if node not in graph2.roots}
137 | common_nodes = nodes1 & nodes2
138 | not_in_2 = nodes1 - nodes2
139 | not_in_1 = nodes2 - nodes1
140 | distance = 0.0
141 | for node in common_nodes:
142 | d1 = graph1.shortest_path_from_root(node)
143 | d2 = graph2.shortest_path_from_root(node)
144 | if d1 != d2:
145 | distance += 1.0 / min(d1, d2) - 1.0 / max(d1, d2)
146 | for node in not_in_2:
147 | distance += 1.0 / max(graph1.shortest_path_from_root(node), 1)
148 | for node in not_in_1:
149 | distance += 1.0 / max(graph2.shortest_path_from_root(node), 1)
150 | if normalize:
151 | if distance > 0.0:
152 | # the graphs are not identical
153 | max_distance = sum(
154 | max(graph1.shortest_path_from_root(node), 1) for node in graph1
155 | ) + sum(max(graph2.shortest_path_from_root(node), 1) for node in graph2)
156 | distance /= max_distance
157 | distance = 1.0 - distance
158 | return distance
159 |
--------------------------------------------------------------------------------
/it_depends/html.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional, Set, Union
2 |
3 | from .dependencies import DependencyGraph, Package, PackageCache
4 |
5 | TEMPLATE: str = """
6 |
7 | It-Depends | $TITLE
8 |
15 |
16 |
17 |
18 |
19 | $TITLE
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
81 |
82 |
83 | """
84 |
85 |
86 | def graph_to_html(
87 | graph: Union[DependencyGraph, PackageCache],
88 | collapse_versions: bool = True,
89 | title: Optional[str] = None,
90 | ) -> str:
91 | if not isinstance(graph, DependencyGraph):
92 | graph = graph.to_graph()
93 | if collapse_versions:
94 | graph = graph.collapse_versions()
95 |
96 | if graph.source_packages:
97 | roots: Set[Package] = graph.source_packages # type: ignore
98 | else:
99 | roots = graph.find_roots().roots
100 |
101 | if not graph.source_packages:
102 | layout = "improvedLayout: false"
103 | else:
104 | layout = "hierarchical: true"
105 |
106 | # sort the nodes and assign IDs to them (so they are in a deterministic order):
107 | node_ids: Dict[Package, int] = {}
108 | for node in sorted(graph):
109 | node_ids[node] = len(node_ids)
110 |
111 | nodes = []
112 | edges = []
113 | for package, node_id in node_ids.items():
114 | nodes.append({"id": node_id, "label": package.full_name})
115 | if package in roots:
116 | nodes[-1].update(
117 | {
118 | "shape": "square",
119 | "color": "red",
120 | "borderWidth": 4,
121 | }
122 | )
123 | if package.vulnerabilities:
124 | nodes[-1].update({"color": "red"})
125 | if graph.source_packages:
126 | nodes[-1]["level"] = max(graph.shortest_path_from_root(package), 0)
127 | for pkg1, pkg2, *_ in graph.out_edges(package): # type: ignore
128 | dep = graph.get_edge_data(pkg1, pkg2)["dependency"]
129 | if collapse_versions:
130 | # if we are collapsing versions, omit the version name
131 | dep_name = f"{dep.source}:{dep.package}"
132 | else:
133 | dep_name = str(dep)
134 | edges.append({"from": node_ids[pkg1], "to": node_ids[pkg2], "shape": "dot"})
135 | if dep_name != pkg2.full_name:
136 | edges[-1]["label"] = dep_name
137 |
138 | if title is None:
139 | source_packages = ", ".join(p.full_name for p in graph.source_packages)
140 | if not source_packages:
141 | title = "Dependency Graph"
142 | else:
143 | title = f"Dependency Graph for {source_packages}"
144 |
145 | return (
146 | TEMPLATE.replace("$NODES", repr(nodes))
147 | .replace("$EDGES", repr(edges))
148 | .replace("$TITLE", title)
149 | .replace("$LAYOUT", layout)
150 | )
151 |
--------------------------------------------------------------------------------
/it_depends/it_depends.py:
--------------------------------------------------------------------------------
1 | from appdirs import AppDirs
2 | import sys
3 |
4 | if sys.version_info < (3, 12):
5 | import pkg_resources
6 |
7 | def version() -> str:
8 | return pkg_resources.require("it-depends")[0].version
9 |
10 | else:
11 | from importlib.metadata import version as meta_version
12 |
13 | def version() -> str:
14 | return meta_version("it-depends")
15 |
16 |
17 | APP_DIRS = AppDirs("it-depends", "Trail of Bits")
--------------------------------------------------------------------------------
/it_depends/native.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 | from pathlib import Path
3 | import re
4 | from tempfile import NamedTemporaryFile
5 | from threading import Lock
6 | from typing import Dict, FrozenSet, Iterator, Optional
7 |
8 | from tqdm import tqdm
9 |
10 | from . import version as it_depends_version
11 | from .docker import DockerContainer, InMemoryDockerfile, InMemoryFile
12 | from .dependencies import (
13 | Dependency,
14 | DependencyResolver,
15 | DockerSetup,
16 | Package,
17 | SemanticVersion,
18 | )
19 |
20 | logger = getLogger(__name__)
21 |
22 |
23 | def make_dockerfile(docker_setup: DockerSetup) -> InMemoryDockerfile:
24 | install_script = InMemoryFile("install.sh", docker_setup.install_package_script.encode("utf-8"))
25 | run_script = InMemoryFile("run.sh", docker_setup.load_package_script.encode("utf-8"))
26 | baseline_script = InMemoryFile("baseline.sh", docker_setup.baseline_script.encode("utf-8"))
27 | pkgs = " ".join(docker_setup.apt_get_packages)
28 | return InMemoryDockerfile(
29 | f"""
30 | FROM ubuntu:20.04
31 |
32 | RUN mkdir -p /workdir
33 |
34 | RUN ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime
35 |
36 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends strace {pkgs}
37 |
38 | {docker_setup.post_install}
39 |
40 | WORKDIR /workdir
41 |
42 | COPY install.sh .
43 | COPY run.sh .
44 | COPY baseline.sh .
45 | RUN chmod +x *.sh
46 | """,
47 | local_files=(install_script, run_script, baseline_script),
48 | )
49 |
50 |
51 | STRACE_LIBRARY_REGEX = re.compile(
52 | r"^open(at)?\(\s*[^,]*\s*,\s*\"((.+?)([^\./]+)\.so(\.(.+?))?)\".*"
53 | )
54 | CONTAINERS_BY_SOURCE: Dict[DependencyResolver, DockerContainer] = {}
55 | BASELINES_BY_SOURCE: Dict[DependencyResolver, FrozenSet[Dependency]] = {}
56 | _CONTAINER_LOCK: Lock = Lock()
57 |
58 |
59 | def get_dependencies(
60 | container: DockerContainer, command: str, pre_command: Optional[str] = None
61 | ) -> Iterator[Dependency]:
62 | """Yields all dynamic libraries loaded by `command`, in order, including duplicates"""
63 | stdout = NamedTemporaryFile(prefix="stdout", delete=False)
64 | if pre_command is not None:
65 | pre_command = f"{pre_command} > /dev/null 2>/dev/null && "
66 | else:
67 | pre_command = ""
68 | command = f"{pre_command}strace -e open,openat -f {command} 3>&1 1>&2 2>&3"
69 | try:
70 | container.run(
71 | "bash",
72 | "-c",
73 | command,
74 | rebuild=False,
75 | interactive=False,
76 | stdout=stdout,
77 | check_existence=False,
78 | )
79 | stdout.close()
80 | with open(stdout.name, "r") as f:
81 | for line in f.readlines():
82 | m = STRACE_LIBRARY_REGEX.match(line)
83 | if m:
84 | path = m.group(2)
85 | if path not in ("/etc/ld.so.cache",) and path.startswith("/"):
86 | yield Dependency(
87 | package=path,
88 | source="ubuntu", # make the package be from the UbuntuResolver
89 | semantic_version=SemanticVersion.parse("*"),
90 | )
91 | finally:
92 | Path(stdout.name).unlink()
93 |
94 |
95 | def get_package_dependencies(container: DockerContainer, package: Package) -> Iterator[Dependency]:
96 | yield from get_dependencies(
97 | container=container,
98 | pre_command=f"./install.sh {package.name} {package.version!s}",
99 | command=f"./run.sh {package.name}",
100 | )
101 |
102 |
103 | def get_baseline_dependencies(container: DockerContainer) -> Iterator[Dependency]:
104 | yield from get_dependencies(container=container, command="./baseline.sh")
105 |
106 |
107 | def container_for(source: DependencyResolver) -> DockerContainer:
108 | with _CONTAINER_LOCK:
109 | if source in CONTAINERS_BY_SOURCE:
110 | return CONTAINERS_BY_SOURCE[source]
111 | docker_setup = source.docker_setup()
112 | if docker_setup is None:
113 | raise ValueError(f"source {source.name} does not support native dependency resolution")
114 | with tqdm(
115 | desc=f"configuring Docker for {source.name}",
116 | leave=False,
117 | unit=" steps",
118 | total=2,
119 | initial=1,
120 | ) as t, make_dockerfile(docker_setup) as dockerfile:
121 | container = DockerContainer(
122 | f"trailofbits/it-depends-{source.name!s}",
123 | dockerfile,
124 | tag=it_depends_version(),
125 | )
126 | t.update(1)
127 | container.rebuild()
128 | CONTAINERS_BY_SOURCE[source] = container
129 | return container
130 |
131 |
132 | def baseline_for(source: DependencyResolver) -> FrozenSet[Dependency]:
133 | with _CONTAINER_LOCK:
134 | if source not in BASELINES_BY_SOURCE:
135 | baseline = frozenset(get_baseline_dependencies(container_for(source)))
136 | BASELINES_BY_SOURCE[source] = baseline
137 | return baseline
138 | else:
139 | return BASELINES_BY_SOURCE[source]
140 |
141 |
142 | def get_native_dependencies(package: Package, use_baseline: bool = False) -> Iterator[Dependency]:
143 | """Yields the native dependencies for an individual package"""
144 | if not package.resolver.docker_setup():
145 | return
146 | container = container_for(package.resolver)
147 | if use_baseline:
148 | baseline = baseline_for(package.resolver)
149 | else:
150 | baseline = frozenset()
151 | for dep in get_package_dependencies(container, package):
152 | if dep not in baseline:
153 | yield dep
154 |
--------------------------------------------------------------------------------
/it_depends/npm.py:
--------------------------------------------------------------------------------
1 | import json
2 | from logging import getLogger
3 | from pathlib import Path
4 | import subprocess
5 | from typing import Dict, Iterator, Optional, Union
6 |
7 | from semantic_version import NpmSpec, SimpleSpec, Version
8 |
9 | from .dependencies import (
10 | AliasedDependency,
11 | Dependency,
12 | DependencyResolver,
13 | DockerSetup,
14 | Package,
15 | PackageCache,
16 | SemanticVersion,
17 | SourcePackage,
18 | SourceRepository,
19 | )
20 |
21 | log = getLogger(__file__)
22 |
23 |
24 | class NPMResolver(DependencyResolver):
25 | name = "npm"
26 | description = "classifies the dependencies of JavaScript packages using `npm`"
27 |
28 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
29 | return bool(self.is_available()) and (repo.path / "package.json").exists()
30 |
31 | def resolve_from_source(
32 | self, repo: SourceRepository, cache: Optional[PackageCache] = None
33 | ) -> Optional[SourcePackage]:
34 | if not self.can_resolve_from_source(repo):
35 | return None
36 | return NPMResolver.from_package_json(repo)
37 |
38 | @staticmethod
39 | def from_package_json(package_json_path: Union[Path, str, SourceRepository]) -> SourcePackage:
40 | if isinstance(package_json_path, SourceRepository):
41 | path = package_json_path.path
42 | source_repository = package_json_path
43 | else:
44 | path = Path(package_json_path)
45 | source_repository = SourceRepository(path.parent)
46 | if path.is_dir():
47 | path = path / "package.json"
48 | if not path.exists():
49 | raise ValueError(f"Expected a package.json file at {path!s}")
50 | with open(path, "r") as json_file:
51 | package = json.load(json_file)
52 | if "name" in package:
53 | name = package["name"]
54 | else:
55 | # use the parent directory name
56 | name = path.parent.name
57 | if "dependencies" in package:
58 | dependencies: Dict[str, str] = package["dependencies"]
59 | else:
60 | dependencies = {}
61 | if "version" in package:
62 | version = package["version"]
63 | else:
64 | version = "0"
65 | version = Version.coerce(version)
66 |
67 | return SourcePackage(
68 | name,
69 | version,
70 | source_repo=source_repository,
71 | source="npm",
72 | dependencies=[generate_dependency_from_information(dep_name, dep_version)
73 | for dep_name, dep_version in dependencies.items()],
74 | )
75 |
76 | def resolve(self, dependency: Union[Dependency, AliasedDependency]) -> Iterator[Package]:
77 | """Yields all packages that satisfy the dependency without expanding those packages' dependencies"""
78 | if dependency.source != self.name:
79 | return
80 |
81 | dependency_name = dependency.package
82 | if isinstance(dependency, AliasedDependency):
83 | dependency_name = f"@{dependency.alias_name}"
84 | # Fix an issue when setting a dependency with a scope, we need to prefix it with @
85 | elif dependency_name.count("/") == 1 and not dependency_name.startswith("@"):
86 | dependency_name = f"@{dependency_name}"
87 |
88 | try:
89 | output = subprocess.check_output(
90 | [
91 | "npm",
92 | "view",
93 | "--json",
94 | f"{dependency_name}@{dependency.semantic_version!s}",
95 | "name",
96 | "version",
97 | "dependencies",
98 | ]
99 | )
100 | except subprocess.CalledProcessError as e:
101 | log.warning(
102 | f"Error running `npm view --json {dependency_name}@{dependency.semantic_version!s} "
103 | f"dependencies`: {e!s}"
104 | )
105 | return
106 |
107 | try:
108 | result = json.loads(output)
109 | except ValueError as e:
110 | raise ValueError(
111 | f"Error parsing output of `npm view --json {dependency_name}@{dependency.semantic_version!s} "
112 | f"dependencies`: {e!s}"
113 | )
114 |
115 | # Only 1 version
116 | if isinstance(result, dict):
117 | deps = result.get("dependencies", {})
118 | yield Package(
119 | name=dependency.package,
120 | version=Version.coerce(result["version"]),
121 | source=self,
122 | dependencies=(
123 | generate_dependency_from_information(dep_name, dep_version, self) for dep_name, dep_version in deps.items()
124 | ),
125 | )
126 | elif isinstance(result, list):
127 | # This means that there are multiple dependencies that match the version
128 | for package in result:
129 | assert package["name"] == dependency.package, "Problem with NPM view output"
130 | dependencies = package.get("dependencies", {})
131 | yield Package(
132 | name=dependency.package,
133 | version=Version.coerce(package["version"]),
134 | source=self,
135 | dependencies=(generate_dependency_from_information(dep_name, dep_version, self)
136 | for dep_name, dep_version in dependencies.items())
137 | )
138 |
139 | @classmethod
140 | def parse_spec(cls, spec: str) -> SemanticVersion:
141 | try:
142 | return NpmSpec(spec)
143 | except ValueError:
144 | pass
145 | try:
146 | return SimpleSpec(spec)
147 | except ValueError:
148 | pass
149 | # Sometimes NPM specs have whitespace, which trips up the parser
150 | no_whitespace = "".join(c for c in spec if c != " ")
151 | if no_whitespace != spec:
152 | return NPMResolver.parse_spec(no_whitespace)
153 |
154 | def docker_setup(self) -> DockerSetup:
155 | return DockerSetup(
156 | apt_get_packages=["npm"],
157 | install_package_script="""#!/usr/bin/env bash
158 | npm install $1@$2
159 | """,
160 | load_package_script="""#!/usr/bin/env bash
161 | node -e "require(\\"$1\\")"
162 | """,
163 | baseline_script='#!/usr/bin/env node -e ""\n',
164 | )
165 |
166 |
167 | def generate_dependency_from_information(
168 | package_name: str,
169 | package_version: str,
170 | source: Union[str, NPMResolver] = "npm",
171 | ) -> Union[Dependency, AliasedDependency, None]:
172 | """Generate a dependency from a dependency declaration.
173 |
174 | A dependency may be declared like this :
175 | * [<@scope>/]@
176 | * @npm:
177 | """
178 | if package_version.startswith("npm:"):
179 | # Does the package have a scope ?
180 |
181 | if package_version.count("@") == 2:
182 | parts = package_version.split("@")
183 | scope, version = parts[1], parts[2]
184 |
185 | semantic_version = NPMResolver.parse_spec(version)
186 | if semantic_version is None:
187 | log.warning("Unable to compute the semantic version of %s (%s)", package_name, package_version)
188 | semantic_version = SimpleSpec("*")
189 |
190 | return AliasedDependency(
191 | package=package_name,
192 | alias_name=scope,
193 | semantic_version=semantic_version,
194 | source=source,
195 | )
196 |
197 | else:
198 | msg = (f"This type of dependencies {package_name} {package_version} is not yet supported."
199 | f" Please open an issue on GitHub.")
200 | raise ValueError(msg)
201 |
202 | else:
203 | semantic_version = NPMResolver.parse_spec(package_version)
204 | if semantic_version is None:
205 | log.warning("Unable to compute the semantic version of %s (%s)", package_name, package_version)
206 | semantic_version = SimpleSpec("*")
207 |
208 | return Dependency(
209 | package=package_name,
210 | semantic_version=semantic_version,
211 | source=source,
212 | )
213 |
--------------------------------------------------------------------------------
/it_depends/pip.py:
--------------------------------------------------------------------------------
1 | import io
2 | from logging import getLogger
3 | from pathlib import Path
4 | from tempfile import TemporaryDirectory
5 | import subprocess
6 | import sys
7 | from typing import Iterable, Iterator, List, Optional, Union
8 |
9 | from johnnydep import JohnnyDist
10 | from johnnydep.logs import configure_logging
11 |
12 | from .dependencies import (
13 | Dependency,
14 | DependencyResolver,
15 | DockerSetup,
16 | Package,
17 | PackageCache,
18 | SemanticVersion,
19 | SimpleSpec,
20 | SourcePackage,
21 | SourceRepository,
22 | Version,
23 | )
24 |
25 |
26 | configure_logging(1)
27 | log = getLogger(__file__)
28 |
29 |
30 | class PipResolver(DependencyResolver):
31 | name = "pip"
32 | description = "classifies the dependencies of Python packages using pip"
33 |
34 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
35 | return (
36 | self.is_available
37 | and (repo.path / "setup.py").exists()
38 | or (repo.path / "requirements.txt").exists()
39 | )
40 |
41 | def resolve_from_source(
42 | self, repo: SourceRepository, cache: Optional[PackageCache] = None
43 | ) -> Optional[SourcePackage]:
44 | if not self.can_resolve_from_source(repo):
45 | return None
46 | return PipSourcePackage.from_repo(repo)
47 |
48 | def docker_setup(self) -> Optional[DockerSetup]:
49 | return DockerSetup(
50 | apt_get_packages=["python3", "python3-pip", "python3-dev", "gcc"],
51 | install_package_script="""#!/usr/bin/env bash
52 | pip3 install $1==$2
53 | """,
54 | load_package_script="""#!/usr/bin/env bash
55 | python3 -c "import $1"
56 | """,
57 | baseline_script='#!/usr/bin/env python3 -c ""\n',
58 | )
59 |
60 | @staticmethod
61 | def _get_specifier(dist_or_str: Union[JohnnyDist, str]) -> SimpleSpec:
62 | if isinstance(dist_or_str, JohnnyDist):
63 | dist_or_str = dist_or_str.specifier
64 | try:
65 | return SimpleSpec(dist_or_str)
66 | except ValueError:
67 | return SimpleSpec("*")
68 |
69 | @staticmethod
70 | def parse_requirements_txt_line(line: str) -> Optional[Dependency]:
71 | line = line.strip()
72 | if not line:
73 | return None
74 | for possible_delimiter in ("=", "<", ">", "~", "!"):
75 | delimiter_pos = line.find(possible_delimiter)
76 | if delimiter_pos >= 0:
77 | break
78 | if delimiter_pos < 0:
79 | # the requirement does not have a version specifier
80 | name = line
81 | version = SimpleSpec("*")
82 | else:
83 | name = line[:delimiter_pos]
84 | version = PipResolver._get_specifier(line[delimiter_pos:])
85 | return Dependency(package=name, semantic_version=version, source=PipResolver())
86 |
87 | @staticmethod
88 | def get_dependencies(
89 | dist_or_requirements_txt_path: Union[JohnnyDist, Path, str]
90 | ) -> Iterable[Dependency]:
91 | if isinstance(dist_or_requirements_txt_path, JohnnyDist):
92 | return (
93 | Dependency(
94 | package=child.name,
95 | semantic_version=PipResolver._get_specifier(child),
96 | source=PipResolver(),
97 | )
98 | for child in dist_or_requirements_txt_path.children
99 | )
100 | elif isinstance(dist_or_requirements_txt_path, str):
101 | dist_or_requirements_txt_path = Path(dist_or_requirements_txt_path)
102 | with open(dist_or_requirements_txt_path / "requirements.txt", "r") as f:
103 | return filter(
104 | lambda d: d is not None,
105 | (
106 | PipResolver.parse_requirements_txt_line(line) # type: ignore
107 | for line in f.readlines()
108 | ),
109 | )
110 |
111 | @staticmethod
112 | def get_version(version_str: str, none_default: Optional[Version] = None) -> Optional[Version]:
113 | if version_str == "none":
114 | # this will happen if the dist is for a local wheel:
115 | return none_default
116 | else:
117 | try:
118 | return Version.coerce(version_str)
119 | except ValueError:
120 | components = version_str.split(".")
121 | if len(components) == 4:
122 | try:
123 | # assume the version component after the last period is the release
124 | return Version(
125 | major=int(components[0]),
126 | minor=int(components[1]),
127 | patch=int(components[2]),
128 | prerelease=components[3],
129 | )
130 | except ValueError:
131 | pass
132 | # TODO: Figure out a better way to handle invalid version strings
133 | return None
134 |
135 | def resolve_dist(
136 | self,
137 | dist: JohnnyDist,
138 | recurse: bool = True,
139 | version: SemanticVersion = SimpleSpec("*"),
140 | ) -> Iterable[Package]:
141 | queue = [(dist, version)]
142 | packages: List[Package] = []
143 | while queue:
144 | dist, sem_version = queue.pop()
145 | if dist.version_installed is not None:
146 | none_default = Version.coerce(dist.version_installed)
147 | else:
148 | none_default = None
149 | for version in sem_version.filter(
150 | filter(
151 | lambda v: v is not None,
152 | (
153 | PipResolver.get_version(v_str, none_default=none_default)
154 | for v_str in dist.versions_available
155 | ),
156 | )
157 | ):
158 | package = Package(
159 | name=dist.name,
160 | version=version,
161 | dependencies=self.get_dependencies(dist),
162 | source=self,
163 | )
164 | packages.append(package)
165 | if not recurse:
166 | break
167 | queue.extend((child, self._get_specifier(child)) for child in dist.children)
168 | return packages
169 |
170 | def resolve(self, dependency: Dependency) -> Iterator[Package]:
171 | print(dependency)
172 | try:
173 | return iter(
174 | self.resolve_dist(
175 | JohnnyDist(f"{dependency.package}"),
176 | version=dependency.semantic_version,
177 | recurse=False,
178 | )
179 | )
180 | except ValueError as e:
181 | log.warning(str(e))
182 | return iter(())
183 |
184 |
185 | class PipSourcePackage(SourcePackage):
186 | @staticmethod
187 | def from_dist(dist: JohnnyDist, source_path: Path) -> "PipSourcePackage":
188 | version_str = dist.specifier
189 | if version_str.startswith("=="):
190 | version_str = version_str[2:]
191 | return PipSourcePackage(
192 | name=dist.name,
193 | version=PipResolver.get_version(version_str),
194 | dependencies=PipResolver.get_dependencies(dist),
195 | source_repo=SourceRepository(source_path),
196 | source="pip",
197 | )
198 |
199 | @staticmethod
200 | def from_repo(repo: SourceRepository) -> "PipSourcePackage":
201 | if (repo.path / "setup.py").exists():
202 | with TemporaryDirectory() as tmp_dir:
203 | try:
204 | _ = sys.stderr.fileno()
205 | stderr = sys.stderr
206 | except io.UnsupportedOperation:
207 | stderr = None
208 | subprocess.check_call(
209 | [
210 | sys.executable,
211 | "-m",
212 | "pip",
213 | "wheel",
214 | "--no-deps",
215 | "-w",
216 | tmp_dir,
217 | str(repo.path.absolute()),
218 | ],
219 | stdout=stderr,
220 | )
221 | wheel = None
222 | for whl in Path(tmp_dir).glob("*.whl"):
223 | if wheel is not None:
224 | raise ValueError(
225 | f"`pip wheel --no-deps {repo.path!s}` produced multiple wheel files!"
226 | )
227 | wheel = whl
228 | if wheel is None:
229 | raise ValueError(
230 | f"`pip wheel --no-deps {repo.path!s}` did not produce a wheel file!"
231 | )
232 | dist = JohnnyDist(str(wheel))
233 | # force JohnnyDist to read the dependencies before deleting the wheel:
234 | _ = dist.children
235 | return PipSourcePackage.from_dist(dist, repo.path)
236 | elif (repo.path / "requirements.txt").exists():
237 | # We just have a requirements.txt and no setup.py
238 | # Use the directory name as the package name
239 | name = repo.path.absolute().name
240 | if (repo.path / "VERSION").exists():
241 | with open(repo.path / "VERSION", "r") as f:
242 | version = PipResolver.get_version(f.read().strip())
243 | else:
244 | version = PipResolver.get_version("0.0.0")
245 | log.info(f"Could not detect {repo.path} version. Using: {version}")
246 | return PipSourcePackage(
247 | name=name,
248 | version=version,
249 | dependencies=PipResolver.get_dependencies(repo.path),
250 | source_repo=repo,
251 | source="pip",
252 | )
253 | else:
254 | raise ValueError(f"{repo.path} neither has a setup.py nor a requirements.txt")
255 |
--------------------------------------------------------------------------------
/it_depends/resolver.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections import defaultdict
3 | from logging import getLogger
4 | from typing import Dict, FrozenSet, Iterable, Iterator, List, Optional, Set, Tuple
5 |
6 | from semantic_version.base import AllOf, BaseSpec
7 |
8 | from .dependencies import Dependency, Package, PackageCache
9 | from .sbom import SBOM
10 |
11 | logger = getLogger(__name__)
12 |
13 |
14 | class CompoundSpec(BaseSpec):
15 | def __init__(self, *to_combine: BaseSpec):
16 | super(CompoundSpec, self).__init__(",".join(s.expression for s in to_combine))
17 | self.clause = AllOf(*(s.clause for s in to_combine))
18 |
19 | @classmethod
20 | def _parse_to_clause(cls, expression):
21 | """Converts an expression to a clause."""
22 | # Placeholder, we actually set self.clause in self.__init__
23 | return None
24 |
25 |
26 | class PackageSet:
27 | def __init__(self):
28 | self._packages: Dict[Tuple[str, str], Package] = {}
29 | self._unsatisfied: Dict[Tuple[str, str], Dict[Dependency, Set[Package]]] = \
30 | defaultdict(lambda: defaultdict(set))
31 | self.is_valid: bool = True
32 | self.is_complete: bool = True
33 |
34 | def __eq__(self, other):
35 | return isinstance(other, PackageSet) and self._packages.values() == other._packages.values()
36 |
37 | def __hash__(self):
38 | return hash(frozenset(self._packages.values()))
39 |
40 | def __len__(self):
41 | return len(self._packages)
42 |
43 | def __iter__(self) -> Iterator[Package]:
44 | yield from self._packages.values()
45 |
46 | def __contains__(self, package: Package) -> bool:
47 | pkg_spec = (package.name, package.source)
48 | return pkg_spec in self._packages and self._packages[pkg_spec] == package
49 |
50 | def unsatisfied_dependencies(self) -> Iterator[Tuple[Dependency, FrozenSet[Package]]]:
51 | for (pkg_name, pkg_source), deps in sorted(
52 | # try the dependencies with the most options first
53 | self._unsatisfied.items(),
54 | key=lambda x: (len(x[1]), x[0])
55 | ):
56 | if len(deps) == 0:
57 | continue
58 | elif len(deps) == 1:
59 | dep, packages = next(iter(deps.items()))
60 | else:
61 | # there are multiple requirements for the same dependency
62 | spec = CompoundSpec(*(d.semantic_version for d in deps.keys()))
63 | dep = Dependency(pkg_name, pkg_source, spec)
64 | packages = {
65 | p
66 | for packages in deps.values()
67 | for p in packages
68 | }
69 |
70 | yield dep, frozenset(packages)
71 |
72 | def copy(self) -> "PackageSet":
73 | ret = PackageSet()
74 | ret._packages = self._packages.copy()
75 | ret._unsatisfied = defaultdict(lambda: defaultdict(set))
76 | for dep_spec, deps in self._unsatisfied.items():
77 | ret._unsatisfied[dep_spec] = defaultdict(set)
78 | for dep, packages in deps.items():
79 | ret._unsatisfied[dep_spec][dep] = set(packages)
80 | assert all(p in ret for p in packages)
81 | ret.is_valid = self.is_valid
82 | ret.is_complete = self.is_complete
83 | return ret
84 |
85 | def add(self, package: Package):
86 | pkg_spec = (package.name, package.source)
87 | if pkg_spec in self._packages and self._packages[pkg_spec].version != package.version:
88 | self.is_valid = False
89 | if not self.is_valid:
90 | return
91 | self._packages[pkg_spec] = package
92 | if pkg_spec in self._unsatisfied:
93 | # there are some existing packages that have unsatisfied dependencies that could be
94 | # satisfied by this new package
95 | for dep in list(self._unsatisfied[pkg_spec].keys()):
96 | if dep.match(package):
97 | del self._unsatisfied[pkg_spec][dep]
98 | if len(self._unsatisfied[pkg_spec]) == 0:
99 | del self._unsatisfied[pkg_spec]
100 | # add any new unsatisfied dependencies for this package
101 | for dep in package.dependencies:
102 | dep_spec = (dep.package, dep.source)
103 | if dep_spec not in self._packages:
104 | self._unsatisfied[dep_spec][dep].add(package)
105 | elif not dep.match(self._packages[dep_spec]):
106 | self.is_valid = False
107 | break
108 |
109 | self.is_complete = self.is_valid and len(self._unsatisfied) == 0
110 |
111 |
112 | class PartialResolution:
113 | def __init__(self, packages: Iterable[Package] = (), dependencies: Iterable[Package] = (),
114 | parent: Optional["PartialResolution"] = None):
115 | self._packages: FrozenSet[Package] = frozenset(packages)
116 | self._dependencies: FrozenSet[Package] = frozenset(dependencies)
117 | self.parent: Optional[PartialResolution] = parent
118 | if self.parent is not None:
119 | self.packages: PackageSet = self.parent.packages.copy()
120 | else:
121 | self.packages = PackageSet()
122 | for package in self._packages:
123 | self.packages.add(package)
124 | if not self.is_valid:
125 | break
126 | if self.is_valid:
127 | for dep in self._dependencies:
128 | self.packages.add(dep)
129 | if not self.is_valid:
130 | break
131 |
132 | @property
133 | def is_valid(self) -> bool:
134 | return self.packages.is_valid
135 |
136 | @property
137 | def is_complete(self) -> bool:
138 | return self.packages.is_complete
139 |
140 | def __contains__(self, package: Package) -> bool:
141 | return package in self.packages
142 |
143 | def add(self, packages: Iterable[Package], depends_on: Package) -> "PartialResolution":
144 | return PartialResolution(packages, (depends_on,), parent=self)
145 |
146 | def packages(self) -> Iterator[Package]:
147 | yield from self.packages
148 |
149 | __iter__ = packages
150 |
151 | def dependencies(self) -> Iterator[Tuple[Package, Package]]:
152 | pr: Optional[PartialResolution] = self
153 | while pr is not None:
154 | for depends_on in sorted(pr._dependencies):
155 | for package in pr._packages:
156 | yield package, depends_on
157 | pr = pr.parent
158 |
159 | def __len__(self) -> int:
160 | return len(self.packages)
161 |
162 | def __eq__(self, other):
163 | return isinstance(other, PartialResolution) and self.packages == other.packages
164 |
165 | def __hash__(self):
166 | return hash(self.packages)
167 |
168 |
169 | def resolve_sbom(root_package: Package, packages: PackageCache, order_ascending: bool = True) -> Iterator[SBOM]:
170 | if not root_package.dependencies:
171 | yield SBOM((), (root_package,))
172 | return
173 |
174 | logger.info(f"Resolving the {['newest', 'oldest'][order_ascending]} possible SBOM for {root_package.name}")
175 |
176 | stack: List[PartialResolution] = [
177 | PartialResolution(packages=(root_package,))
178 | ]
179 |
180 | history: Set[PartialResolution] = {
181 | pr for pr in stack
182 | if pr.is_valid
183 | }
184 |
185 | while stack:
186 | pr = stack.pop()
187 | if pr.is_complete:
188 | yield SBOM(pr.dependencies(), root_packages=(root_package,))
189 | continue
190 | elif not pr.is_valid:
191 | continue
192 |
193 | for dep, required_by in pr.packages.unsatisfied_dependencies():
194 | if not PartialResolution(packages=required_by, parent=pr).is_valid:
195 | continue
196 | for match in sorted(
197 | packages.match(dep),
198 | key=lambda p: p.version,
199 | reverse=order_ascending
200 | ):
201 | next_pr = pr.add(required_by, match)
202 | if next_pr.is_valid and next_pr not in history:
203 | history.add(next_pr)
204 | stack.append(next_pr)
205 |
--------------------------------------------------------------------------------
/it_depends/sbom.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple, Type, TypeVar
2 |
3 | from cyclonedx.builder.this import this_component as cdx_lib_component
4 | from cyclonedx.model import XsUri
5 | from cyclonedx.model.bom import Bom
6 | from cyclonedx.model.component import Component, ComponentType
7 | from cyclonedx.model.contact import OrganizationalEntity
8 | from cyclonedx.output.json import JsonV1Dot5
9 |
10 | from . import version
11 | from .dependencies import Package
12 |
13 | __all__ = "cyclonedx_to_json", "SBOM"
14 |
15 |
16 | S = TypeVar("S", bound="SBOM")
17 |
18 |
19 | class SBOM:
20 | def __init__(self, dependencies: Iterable[Tuple[Package, Package]] = (), root_packages: Iterable[Package] = ()):
21 | self.dependencies: FrozenSet[Tuple[Package, Package]] = frozenset(dependencies)
22 | self.root_packages: FrozenSet[Package] = frozenset(root_packages)
23 |
24 | @property
25 | def packages(self) -> FrozenSet[Package]:
26 | return self.root_packages | {
27 | p
28 | for deps in self.dependencies
29 | for p in deps
30 | }
31 |
32 | def __str__(self):
33 | return ", ".join(p.full_name for p in sorted(self.packages))
34 |
35 | def to_cyclonedx(self) -> Bom:
36 | bom = Bom()
37 |
38 | expanded: Dict[Package, Component] = {}
39 |
40 | root_component: Optional[Component] = None
41 |
42 | for root_package in sorted(
43 | self.root_packages,
44 | key=lambda package: package.full_name,
45 | reverse=True
46 | ):
47 | root_component = Component(
48 | name=root_package.name,
49 | type=ComponentType.APPLICATION,
50 | version=str(root_package.version),
51 | bom_ref=root_package.full_name,
52 | )
53 | bom.components.add(root_component)
54 | expanded[root_package] = root_component
55 |
56 | bom.metadata.tools.components.add(cdx_lib_component())
57 | bom.metadata.tools.components.add(Component(
58 | name="it-depends",
59 | supplier=OrganizationalEntity(
60 | name="Trail of Bits",
61 | urls=[XsUri("https://www.trailofbits.com/")]
62 | ),
63 | type=ComponentType.APPLICATION,
64 | version=version(),
65 | ))
66 |
67 | if root_component is not None:
68 | bom.metadata.component = root_component
69 |
70 | for pkg, depends_on in self.dependencies:
71 | if pkg not in expanded:
72 | component = Component(
73 | name=pkg.name,
74 | type=ComponentType.LIBRARY,
75 | version=str(pkg.version),
76 | bom_ref=f"{pkg.full_name}@{pkg.version!s}"
77 | )
78 | bom.components.add(component)
79 | else:
80 | component = expanded[pkg]
81 | if depends_on not in expanded:
82 | d_component = Component(
83 | name=depends_on.name,
84 | type=ComponentType.LIBRARY,
85 | version=str(depends_on.version),
86 | bom_ref=f"{depends_on.full_name}@{depends_on.version!s}"
87 | )
88 | bom.components.add(d_component)
89 | else:
90 | d_component = expanded[depends_on]
91 | bom.register_dependency(component, [d_component])
92 |
93 | return bom
94 |
95 | def __or__(self, other: "SBOM") -> "SBOM":
96 | return SBOM(self.dependencies | other.dependencies, self.root_packages | other.root_packages)
97 |
98 | def __hash__(self):
99 | return hash((self.root_packages, self.dependencies))
100 |
101 | def __eq__(self, other):
102 | return isinstance(other, SBOM) and self.root_packages == other.root_packages \
103 | and self.dependencies == other.dependencies
104 |
105 |
106 | def cyclonedx_to_json(bom: Bom, indent: int = 2) -> str:
107 | return JsonV1Dot5(bom).output_as_string(indent=indent)
108 |
--------------------------------------------------------------------------------
/it_depends/ubuntu/__init__.py:
--------------------------------------------------------------------------------
1 | # Load the resolver so it auto-registers itself
2 | from . import resolver
3 |
--------------------------------------------------------------------------------
/it_depends/ubuntu/apt.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import gzip
3 | from pathlib import Path
4 | import re
5 | import logging
6 | from threading import Lock
7 | from typing import Dict, List, Optional, Set, Tuple
8 | from urllib import request
9 |
10 | from ..it_depends import APP_DIRS
11 | from .docker import run_command
12 |
13 | logger = logging.getLogger(__name__)
14 | all_packages: Optional[Tuple[str, ...]] = None
15 | _APT_LOCK: Lock = Lock()
16 |
17 |
18 | def get_apt_packages() -> Tuple[str, ...]:
19 | with _APT_LOCK:
20 | global all_packages
21 | if all_packages is None:
22 | logger.info("Rebuilding global apt package list.")
23 | raw_packages = run_command("apt", "list").decode("utf-8")
24 | all_packages = tuple(x.split("/")[0] for x in raw_packages.splitlines() if x)
25 |
26 | logger.info(f"Global apt package count {len(all_packages)}")
27 | return all_packages
28 |
29 |
30 | def search_package(package: str) -> str:
31 | found_packages: List[str] = []
32 | for apt_package in get_apt_packages():
33 | if package.lower() not in apt_package:
34 | continue
35 | if re.match(
36 | rf"^(lib)*{re.escape(package.lower())}(\-*([0-9]*)(\.*))*(\-dev)*$",
37 | apt_package,
38 | ):
39 | found_packages.append(apt_package)
40 | found_packages.sort(key=len, reverse=True)
41 | if not found_packages:
42 | raise ValueError(f"Package {package} not found in apt package list.")
43 | logger.info(f"Found {len(found_packages)} matching packages, Choosing {found_packages[0]}")
44 | return found_packages[0]
45 |
46 |
47 | contents_db: Dict[str, List[str]] = {}
48 | _loaded_dbs: Set[Path] = set()
49 |
50 |
51 | @functools.lru_cache(maxsize=5242880)
52 | def _file_to_package_contents(filename: str, arch: str = "amd64"):
53 | """
54 | Downloads and uses apt-file database directly
55 | # http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-amd64.gz
56 | # http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-i386.gz
57 | """
58 | if arch not in ("amd64", "i386"):
59 | raise ValueError("Only amd64 and i386 supported")
60 | selected = None
61 |
62 | dbfile = Path(APP_DIRS.user_cache_dir) / f"Contents-{arch}.gz"
63 | if not dbfile.exists():
64 | request.urlretrieve(
65 | f"http://security.ubuntu.com/ubuntu/dists/focal-security/Contents-{arch}.gz",
66 | dbfile,
67 | )
68 | if not dbfile in _loaded_dbs:
69 | logger.info("Rebuilding contents db")
70 | with gzip.open(str(dbfile), "rt") as contents:
71 | for line in contents.readlines():
72 | filename_i, *packages_i = re.split(r"\s+", line[:-1])
73 | assert len(packages_i) > 0
74 | contents_db.setdefault(filename_i, []).extend(packages_i)
75 | _loaded_dbs.add(dbfile)
76 |
77 | regex = re.compile("(.*/)+" + filename + "$")
78 | matches = 0
79 | for (filename_i, packages_i) in contents_db.items():
80 | if regex.match(filename_i):
81 | matches += 1
82 | for package_i in packages_i:
83 | if selected is None or len(selected[0]) > len(filename_i):
84 | selected = filename_i, package_i
85 | if selected:
86 | logger.info(f"Found {matches} matching packages for {filename}. Choosing {selected[1]}")
87 | else:
88 | raise ValueError(f"{filename} not found in Contents database")
89 | return selected[1]
90 |
91 |
92 | @functools.lru_cache(maxsize=5242880)
93 | def file_to_packages(filename: str, arch: str = "amd64") -> List[str]:
94 | if arch not in ("amd64", "i386"):
95 | raise ValueError("Only amd64 and i386 supported")
96 | logger.debug(f'Running [{" ".join(["apt-file", "-x", "search", filename])}]')
97 | contents = run_command("apt-file", "-x", "search", filename).decode("utf-8")
98 | selected: List[str] = []
99 | for line in contents.split("\n"):
100 | if not line:
101 | continue
102 | package_i, _ = line.split(": ")
103 | selected.append(package_i)
104 | return sorted(selected)
105 |
106 |
107 | def file_to_package(filename: str, arch: str = "amd64") -> str:
108 | packages = file_to_packages(filename, arch)
109 | if packages:
110 | _, result = min((len(pkg), pkg) for pkg in packages)
111 | logger.info(f"Found {len(packages)} matching packages for {filename}. Choosing {result}")
112 | return result
113 | else:
114 | raise ValueError(f"{filename} not found in apt-file")
115 |
116 |
117 | def cached_file_to_package(
118 | pattern: str, file_to_package_cache: Optional[List[Tuple[str, str]]] = None
119 | ) -> str:
120 | # file_to_package_cache contains all the files that are provided be previous
121 | # dependencies. If a file pattern is already sastified by current files
122 | # use the package already included as a dependency
123 | if file_to_package_cache is not None:
124 | regex = re.compile("(.*/)+" + pattern + "$")
125 | for package_i, filename_i in file_to_package_cache:
126 | if regex.match(filename_i):
127 | return package_i
128 |
129 | package = file_to_package(pattern)
130 |
131 | # a new package is chosen add all the files it provides to our cache
132 | # uses `apt-file` command line tool
133 | if file_to_package_cache is not None:
134 | contents = run_command("apt-file", "list", package).decode("utf-8")
135 | for line in contents.split("\n"):
136 | if ":" not in line:
137 | break
138 | package_i, filename_i = line.split(": ")
139 | file_to_package_cache.append((package_i, filename_i))
140 |
141 | return package
142 |
--------------------------------------------------------------------------------
/it_depends/ubuntu/docker.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 | from pathlib import Path
3 | import shutil
4 | import subprocess
5 | import logging
6 | import re
7 | from threading import Lock
8 | from typing import Optional, Pattern
9 |
10 | from ..docker import DockerContainer, InMemoryDockerfile
11 |
12 | _container: Optional[DockerContainer] = None
13 | _UBUNTU_LOCK: Lock = Lock()
14 |
15 | _UBUNTU_NAME_MATCH: Pattern[str] = re.compile(r"^\s*name\s*=\s*\"ubuntu\"\s*$", flags=re.IGNORECASE)
16 | _VERSION_ID_MATCH: Pattern[str] = re.compile(
17 | r"^\s*version_id\s*=\s*\"([^\"]+)\"\s*$", flags=re.IGNORECASE
18 | )
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 |
23 | @lru_cache(maxsize=4)
24 | def is_running_ubuntu(check_version: Optional[str] = None) -> bool:
25 | """
26 | Tests whether the current system is running Ubuntu
27 |
28 | If `check_version` is not None, the specific version of Ubuntu is also tested.
29 | """
30 | os_release_path = Path("/etc/os-release")
31 | if not os_release_path.exists():
32 | return False
33 | is_ubuntu = False
34 | version: Optional[str] = None
35 | with open(os_release_path, "r") as f:
36 | for line in f.readlines():
37 | line = line.strip()
38 | is_ubuntu = is_ubuntu or bool(_UBUNTU_NAME_MATCH.match(line))
39 | if check_version is None:
40 | if is_ubuntu:
41 | return True
42 | elif version is None:
43 | m = _VERSION_ID_MATCH.match(line)
44 | if m:
45 | version = m.group(1)
46 | else:
47 | break
48 | return is_ubuntu and (check_version is None or version == check_version)
49 |
50 |
51 | def run_command(*args: str) -> bytes:
52 | """
53 | Runs the given command in Ubuntu 20.04
54 |
55 | If the host system is not running Ubuntu 20.04, the command is run in Docker.
56 |
57 | """
58 | with _UBUNTU_LOCK:
59 | global _container
60 | if _container is None:
61 | with InMemoryDockerfile(
62 | """FROM ubuntu:20.04
63 |
64 | RUN apt-get update && apt-get install -y apt-file && apt-file update
65 | """
66 | ) as dockerfile:
67 | _container = DockerContainer("trailofbits/it-depends-apt", dockerfile=dockerfile)
68 | _container.rebuild()
69 | logger.debug(f"running {' '.join(args)} in Docker")
70 | p = _container.run(
71 | *args,
72 | interactive=False,
73 | stdout=subprocess.PIPE,
74 | stderr=subprocess.DEVNULL,
75 | rebuild=False,
76 | )
77 | if p.returncode != 0:
78 | raise subprocess.CalledProcessError(p.returncode, cmd=f"{' '.join(args)}")
79 | return p.stdout
80 |
--------------------------------------------------------------------------------
/it_depends/ubuntu/resolver.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 | import shutil
3 | import subprocess
4 | import logging
5 | import re
6 | from typing import Iterable, Iterator, Optional
7 |
8 | from .apt import file_to_packages
9 | from .docker import is_running_ubuntu, run_command
10 | from ..dependencies import (
11 | Dependency,
12 | DependencyResolver,
13 | Dict,
14 | List,
15 | Package,
16 | PackageCache,
17 | ResolverAvailability,
18 | SimpleSpec,
19 | SourcePackage,
20 | SourceRepository,
21 | Tuple,
22 | Version,
23 | )
24 | from ..native import get_native_dependencies
25 |
26 | logger = logging.getLogger(__name__)
27 |
28 |
29 | class UbuntuResolver(DependencyResolver):
30 | name = "ubuntu"
31 | description = "expands dependencies based upon Ubuntu package dependencies"
32 |
33 | _pattern = re.compile(r" *(?P[^ ]*)( *\((?P.*)\))? *")
34 | _ubuntu_version = re.compile("([0-9]+:)*(?P[^-]*)(-.*)*")
35 |
36 | @staticmethod
37 | @lru_cache(maxsize=2048)
38 | def ubuntu_packages(package_name: str) -> Iterable[Package]:
39 | """Iterates over all of the package versions available for a package name"""
40 | # Parses the dependencies of dependency.package out of the `apt show` command
41 | logger.debug(f"Running `apt show -a {package_name}`")
42 | try:
43 | contents = run_command("apt", "show", "-a", package_name).decode("utf8")
44 | except subprocess.CalledProcessError as e:
45 | if e.returncode == 100:
46 | contents = ""
47 | else:
48 | raise
49 |
50 | # Possibly means that the package does not appear ubuntu with the exact name
51 | if not contents:
52 | logger.warning(f"Package {package_name} not found in ubuntu installed apt sources")
53 | return ()
54 |
55 | # Example depends line:
56 | # Depends: libc6 (>= 2.29), libgcc-s1 (>= 3.4), libstdc++6 (>= 9)
57 | version: Optional[Version] = None
58 | packages: Dict[Tuple[str, Version], List[List[Dependency]]] = {}
59 | for line in contents.split("\n"):
60 | if line.startswith("Version: "):
61 | matched = UbuntuResolver._ubuntu_version.match(line[len("Version: ") :])
62 | if matched:
63 | # FIXME: Ubuntu versions can include "~", which the semantic_version library does not like
64 | # So hack a fix by simply dropping everything after the tilde:
65 | raw_version = matched.group("version").split("~", maxsplit=1)[0]
66 | version = Version.coerce(raw_version)
67 | if (package_name, version) not in packages:
68 | packages[(package_name, version)] = []
69 | else:
70 | logger.warning(f"Failed to parse package {package_name} {line}")
71 | elif version is not None and line.startswith("Depends: "):
72 | deps = []
73 | for dep in line[9:].split(","):
74 | for or_segment in dep.split("|"):
75 | # Fixme: For now, treat each ORed dependency as a separate ANDed dependency
76 | matched = UbuntuResolver._pattern.match(or_segment)
77 | if not matched:
78 | raise ValueError(
79 | f"Invalid dependency line in apt output for {package_name}: {line!r}"
80 | )
81 | dep_package = matched.group("package")
82 | dep_version = matched.group("version")
83 | try:
84 | # remove trailing ubuntu versions like "-10ubuntu4":
85 | dep_version = dep_version.split("-", maxsplit=1)[0]
86 | dep_version = dep_version.replace(" ", "")
87 | SimpleSpec(dep_version.replace(" ", ""))
88 | except Exception as e:
89 | dep_version = "*" # Yolo FIXME Invalid simple block '= 1:7.0.1-12'
90 |
91 | deps.append((dep_package, dep_version))
92 |
93 | packages[(package_name, version)].append(
94 | [
95 | Dependency(
96 | package=pkg,
97 | semantic_version=SimpleSpec(ver),
98 | source=UbuntuResolver(),
99 | )
100 | for pkg, ver in deps
101 | ]
102 | )
103 | version = None
104 |
105 | # Sometimes `apt show` will return multiple packages with the same version but different dependencies.
106 | # For example: `apt show -a dkms`
107 | # Currently, we do a union over their dependencies
108 | # TODO: Figure out a better way to handle this
109 | return [
110 | Package(
111 | name=pkg_name,
112 | version=version,
113 | source=UbuntuResolver(),
114 | dependencies=set().union(*duplicates), # type: ignore
115 | )
116 | for (pkg_name, version), duplicates in packages.items()
117 | ]
118 |
119 | def resolve(self, dependency: Dependency) -> Iterator[Package]:
120 | if dependency.source != "ubuntu":
121 | raise ValueError(
122 | f"{self} can not resolve dependencies from other sources ({dependency})"
123 | )
124 |
125 | if dependency.package.startswith("/"):
126 | # this is a file path, likely produced from native.py
127 | try:
128 | deps = []
129 | for pkg_name in file_to_packages(dependency.package):
130 | deps.append(Dependency(package=pkg_name, source=UbuntuResolver.name))
131 | if deps:
132 | yield Package(
133 | name=dependency.package,
134 | source=dependency.source,
135 | version=Version.coerce("0"),
136 | dependencies=deps,
137 | )
138 | except (ValueError, subprocess.CalledProcessError):
139 | pass
140 | else:
141 | for package in UbuntuResolver.ubuntu_packages(dependency.package):
142 | if package.version in dependency.semantic_version:
143 | yield package
144 |
145 | def __lt__(self, other):
146 | """Make sure that the Ubuntu Classifier runs last"""
147 | return False
148 |
149 | def is_available(self) -> ResolverAvailability:
150 | if shutil.which("docker") is None:
151 | return ResolverAvailability(
152 | False,
153 | "`Ubuntu` classifier needs to have Docker installed. Try apt install docker.io.",
154 | )
155 | return ResolverAvailability(True)
156 |
157 | def can_resolve_from_source(self, repo: SourceRepository) -> bool:
158 | return False
159 |
160 | def resolve_from_source(
161 | self, repo: SourceRepository, cache: Optional[PackageCache] = None
162 | ) -> Optional[SourcePackage]:
163 | return None
164 |
165 | def can_update_dependencies(self, package: Package) -> bool:
166 | return package.source != UbuntuResolver.name
167 |
168 | def update_dependencies(self, package: Package) -> Package:
169 | native_deps = get_native_dependencies(package)
170 | package.dependencies = package.dependencies.union(frozenset(native_deps))
171 | return package
172 |
--------------------------------------------------------------------------------
/it_depends/vcs.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to automatically download source repositories from various VCS systems and providers.
3 | Logic largely taken from the implementation of `go get`:
4 |
5 | https://golang.org/src/cmd/go/internal/vcs/vcs.go
6 |
7 | """
8 | import sys
9 | from dataclasses import dataclass
10 | import os
11 | import re
12 | from re import Pattern
13 | import subprocess
14 | from typing import Callable, cast, Dict, Iterable, List, Optional, Type, TypeVar
15 |
16 |
17 | class VCSResolutionError(ValueError):
18 | pass
19 |
20 |
21 | class GoVCSConfigError(VCSResolutionError):
22 | pass
23 |
24 |
25 | T = TypeVar("T")
26 |
27 |
28 | class VCS:
29 | _DEFAULT_INSTANCE: "VCS"
30 |
31 | def __init__(self, name: str, cmd: str, scheme: Iterable[str], ping_cmd: Iterable[str]):
32 | self.name: str = name
33 | self.cmd: str = cmd
34 | self.scheme: List[str] = list(scheme)
35 | self.ping_cmd: List[str] = list(ping_cmd)
36 |
37 | def __init_subclass__(cls, **kwargs):
38 | setattr(cls, "_DEFAULT_INSTANCE", cls())
39 |
40 | @classmethod
41 | def default_instance(cls: Type[T]) -> T:
42 | return cast(T, getattr(cls, "_DEFAULT_INSTANCE"))
43 |
44 | def ping(self, repo: str) -> Optional[str]:
45 | env = {"GIT_TERMINAL_PROMPT": "0"}
46 | if os.environ.get("GIT_SSH", "") == "" and os.environ.get("GIT_SSH_COMMAND", "") == "":
47 | # disable any ssh connection pooling by git
48 | env["GIT_SSH_COMMAND"] = "ssh -o ControlMaster=no"
49 | for scheme in self.scheme:
50 | cmd = [self.cmd] + [
51 | c.replace("{scheme}", scheme).replace("{repo}", repo) for c in self.ping_cmd
52 | ]
53 | if (
54 | subprocess.call(cmd, stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL, env=env)
55 | == 0
56 | ):
57 | return scheme
58 | return None
59 |
60 | def __hash__(self):
61 | return hash(self.name)
62 |
63 | def __eq__(self, other):
64 | return isinstance(other, VCS) and self.name == other.name
65 |
66 |
67 | class Git(VCS):
68 | def __init__(self):
69 | super().__init__(
70 | name="Git",
71 | cmd="git",
72 | scheme=("git", "https", "http", "git+ssh", "ssh"),
73 | ping_cmd=("ls-remote", "{scheme}://{repo}"),
74 | )
75 |
76 |
77 | VCSes: List[VCS] = [vcs.default_instance() for vcs in (Git,)]
78 |
79 | # VCS_MOD is a stub for the "mod" scheme. It's returned by
80 | # repoRootForImportPathDynamic, but is otherwise not treated as a VCS command.
81 | VCS_MOD = VCS(name="mod", cmd="", scheme=(), ping_cmd=())
82 |
83 |
84 | @dataclass
85 | class Match:
86 | prefix: str
87 | import_path: str
88 | repo: str = ""
89 | vcs: str = ""
90 | root: Optional[str] = None
91 |
92 | def expand(self, s: str) -> str:
93 | for key, value in self.__dict__.items():
94 | if not key.startswith("_"):
95 | s = s.replace(f"{{{key}}}", value)
96 | return s
97 |
98 |
99 | if sys.version_info >= (3, 9):
100 | REGEXP_TYPE = Pattern[str]
101 | else:
102 | REGEXP_TYPE = Pattern
103 |
104 |
105 | @dataclass
106 | class VCSPath:
107 | regexp: REGEXP_TYPE
108 | repo: str = ""
109 | path_prefix: str = ""
110 | check: Optional[Callable[[Match], None]] = None
111 | vcs: Optional[str] = None
112 | schemeless_repo: bool = False
113 |
114 |
115 | class VCSMatchError(VCSResolutionError):
116 | pass
117 |
118 |
119 | def no_vcs_suffix(match: Match):
120 | """
121 | checks that the repository name does not end in .foo for any version control system foo.
122 | The usual culprit is ".git".
123 |
124 | """
125 | repo = match.repo
126 | for vcs in VCSes:
127 | if repo.endswith(f".{vcs.cmd}"):
128 | raise VCSMatchError(f"Invalid version control suffix in {match.prefix!r} path")
129 |
130 |
131 | VCS_PATHS: List[VCSPath] = []
132 |
133 |
134 | def _register(path: VCSPath) -> VCSPath:
135 | VCS_PATHS.append(path)
136 | return path
137 |
138 |
139 | GITHUB = _register(
140 | VCSPath(
141 | path_prefix="github.com",
142 | regexp=re.compile(
143 | r"^(?Pgithub\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$"
144 | ),
145 | vcs="git",
146 | repo="https://{root}",
147 | check=no_vcs_suffix,
148 | )
149 | )
150 |
151 |
152 | GENERAL_REPO = _register(
153 | VCSPath(
154 | regexp=re.compile(
155 | r"(?P(?P([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\."
156 | r"(?Pbzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$"
157 | ),
158 | schemeless_repo=True,
159 | )
160 | )
161 |
162 |
163 | @dataclass
164 | class Repository:
165 | repo: str
166 | root: str
167 | vcs: VCS
168 | is_custom: bool = False
169 |
170 |
171 | def vcs_by_cmd(cmd: str) -> Optional[VCS]:
172 | """vcsByCmd returns the version control system for the given command name (hg, git, svn, bzr)."""
173 | for vcs in VCSes:
174 | if cmd == vcs.cmd:
175 | return vcs
176 | return None
177 |
178 |
179 | @dataclass
180 | class GoVCSRule:
181 | pattern: str
182 | allowed: List[str]
183 |
184 |
185 | DEFAULT_GO_VCS: List[GoVCSRule] = [
186 | GoVCSRule("private", ["all"]),
187 | GoVCSRule("public", ["git", "hg"]),
188 | ]
189 |
190 |
191 | GO_VCS_RULES: Optional[List[GoVCSRule]] = None
192 |
193 |
194 | def parse_go_vcs(s: str) -> Optional[List[GoVCSRule]]:
195 | s = s.strip()
196 | if not s:
197 | return None
198 | rules: List[GoVCSRule] = []
199 | have: Dict[str, str] = {}
200 | for item in s.split(","):
201 | item = item.strip()
202 | if not item:
203 | raise GoVCSConfigError(f"Empty entry in GOVCS")
204 | i = item.find(":")
205 | if i < 0:
206 | raise GoVCSConfigError(f"Malformed entry in GOVCS (missing colon): {item!r}")
207 | pattern, vcs_list = item[:i].strip(), item[i + 1 :].strip()
208 | if not pattern:
209 | raise GoVCSConfigError(f"Empty pattern in GOVCS: {item!r}")
210 | if not vcs_list:
211 | raise GoVCSConfigError(f"Empty VCS list in GOVCS: {item!r}")
212 | if not os.path.isabs(pattern):
213 | raise GoVCSConfigError(f"Relative pattern not allowed in GOVCS: {pattern!r}")
214 | if have.get(pattern, default=""):
215 | raise GoVCSConfigError(
216 | f"Unreachable pattern in GOVCS: {item!r} after {have[pattern]!r}"
217 | )
218 | have[pattern] = item
219 | allowed = [a.strip() for a in vcs_list.split("|")]
220 | if any(not a for a in allowed):
221 | raise GoVCSConfigError(f"Empty VCS name in GOVCS: {item!r}")
222 | rules.append(GoVCSRule(pattern=pattern, allowed=allowed))
223 | return rules
224 |
225 |
226 | def check_go_vcs(vcs: VCS, root: str):
227 | if vcs == VCS_MOD:
228 | return
229 | global GO_VCS_RULES
230 | if GO_VCS_RULES is None:
231 | GO_VCS_RULES = parse_go_vcs(os.getenv("GOVCS", ""))
232 | if GO_VCS_RULES is None:
233 | GO_VCS_RULES = []
234 | GO_VCS_RULES.extend(DEFAULT_GO_VCS)
235 | # TODO: Eventually consider implementing this GOVCS check:
236 | # private := module.MatchPrefixPatterns(cfg.GOPRIVATE, root)
237 | # if !govcs.allow(root, private, vcs.Cmd) {
238 | # what := "public"
239 | # if private {
240 | # what = "private"
241 | # }
242 | # return fmt.Errorf("GOVCS disallows using %s for %s %s; see 'go help vcs'", vcs.Cmd, what, root)
243 | # }
244 |
245 |
246 | def resolve(path: str) -> Repository:
247 | for service in VCS_PATHS:
248 | if not path.startswith(service.path_prefix):
249 | continue
250 | m = service.regexp.match(path)
251 | if m is None:
252 | if service.path_prefix:
253 | raise VCSMatchError(f"Invalid {service.path_prefix} import path {path!r}")
254 | match = Match(prefix=f"{service.path_prefix}/", import_path=path)
255 | if m:
256 | for name, value in m.groupdict().items():
257 | if name and value:
258 | setattr(match, name, value)
259 | if service.vcs is not None:
260 | match.vcs = match.expand(service.vcs)
261 | if service.repo:
262 | match.repo = match.expand(service.repo)
263 | if service.check is not None:
264 | service.check(match)
265 | vcs = vcs_by_cmd(match.vcs)
266 | if vcs is None:
267 | raise VCSResolutionError(f"unknown version control system {match.vcs!r}")
268 | elif match.root is None:
269 | raise VCSResolutionError(f"{match!r} was expected to have a non-None root!")
270 | check_go_vcs(vcs, match.root)
271 | if not service.schemeless_repo:
272 | repo_url: str = match.repo
273 | else:
274 | scheme = vcs.ping(match.repo)
275 | if scheme is None:
276 | scheme = vcs.scheme[0]
277 | repo_url = f"{scheme}://{match.repo}"
278 | return Repository(repo=repo_url, root=match.root, vcs=vcs)
279 | raise VCSResolutionError(f"Unable to resolve repository for {path!r}")
280 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 | line_length = 100
3 | multi_line_output = 3
4 | known_first_party = "it_depends"
5 | include_trailing_comma = true
6 |
7 | [tool.black]
8 | line-length = 100
9 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup, find_packages
3 |
4 | SETUP_DIR = os.path.dirname(os.path.realpath(__file__))
5 | README_PATH = os.path.join(SETUP_DIR, "README.md")
6 |
7 | with open(README_PATH, "r") as readme:
8 | README = readme.read()
9 |
10 | setup(
11 | name="it-depends",
12 | description="A software dependency analyzer",
13 | long_description=README,
14 | long_description_content_type="text/markdown",
15 | license="LGPL-3.0-or-later",
16 | url="https://github.com/trailofbits/it-depends",
17 | author="Trail of Bits",
18 | version="0.1.3",
19 | packages=find_packages(exclude=["test"]),
20 | python_requires=">=3.7",
21 | install_requires=[
22 | "appdirs>=1.4.4",
23 | "cyclonedx-python-lib >= 5,< 9",
24 | "docker>=4.4.0",
25 | "graphviz>=0.14.1",
26 | "johnnydep>=1.8",
27 | "networkx>=2.4",
28 | "parse_cmake>=0.4.1",
29 | "semantic_version~=2.8.5",
30 | "sqlalchemy>=1.3",
31 | "tqdm>=4.48.0",
32 | # Indirect dependencies for which we pin a minimum version to mitigate vulnerabilities:
33 | "requests>=2.20.0", # CVE-2018-18074
34 | "urllib3>=1.26.5", # CVE-2021-33503
35 | ],
36 | extras_require={
37 | "dev": ["flake8", "pytest", "twine", "mypy>=0.812", "types-setuptools", "types-requests"]
38 | },
39 | entry_points={
40 | "console_scripts": [
41 | "it-depends = it_depends.__main__:main"
42 | ]
43 | },
44 | classifiers=[
45 | "Development Status :: 4 - Beta",
46 | "Environment :: Console",
47 | "Intended Audience :: Science/Research",
48 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
49 | "Programming Language :: Python :: 3 :: Only",
50 | "Topic :: Utilities"
51 | ]
52 | )
53 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trailofbits/it-depends/cdb9a1a04bfb3ee3d61c10e30157e8ce0cb38223/test/__init__.py
--------------------------------------------------------------------------------
/test/rebuild_expected_output.py:
--------------------------------------------------------------------------------
1 | """
2 | Rebuilds repos/*.expected.json by running the tests in a Docker container to match how they would be run in CI
3 | """
4 |
5 | from pathlib import Path
6 | from typing import Optional
7 |
8 | from it_depends.docker import DockerContainer, Dockerfile
9 |
10 | from test_smoke import IT_DEPENDS_DIR, SmokeTest, SMOKE_TESTS
11 |
12 |
13 | CI_TEST_PATH: Path = Path(__file__).parent.parent / ".github" / "workflows" / "tests.yml"
14 | _CONTAINER: Optional[DockerContainer] = None
15 |
16 |
17 | def container_type() -> str:
18 | """Returns the Docker container name used in GitHub CI"""
19 | if not CI_TEST_PATH.exists():
20 | raise ValueError(f"GitHub action file {CI_TEST_PATH!s} does not exist!")
21 | with open(CI_TEST_PATH, "r") as f:
22 | for line in f.readlines():
23 | line = line.strip()
24 | if line.startswith("runs-on:"):
25 | github_name = line[len("runs-on:"):].lstrip()
26 | hyphen_index = github_name.find("-")
27 | if hyphen_index < 0:
28 | raise ValueError(f"Unknown runs-on: container type {github_name!r} in {CI_TEST_PATH}")
29 | return f"{github_name[:hyphen_index]}:{github_name[hyphen_index+1:]}"
30 | raise ValueError(f"Did not find `runs-on: ...` line in {CI_TEST_PATH}")
31 |
32 |
33 | def get_container() -> DockerContainer:
34 | global _CONTAINER
35 | if _CONTAINER is None:
36 | dockerfile = Dockerfile(IT_DEPENDS_DIR / "Dockerfile")
37 | dockerfile_existed = dockerfile.exists()
38 | try:
39 | if not dockerfile_existed:
40 | with open(dockerfile.path, "w") as f:
41 | f.write(f"""FROM {container_type()}
42 |
43 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && \\
44 | DEBIAN_FRONTEND=noninteractive apt-get install -y python3 python3-dev python3-pip docker.io \\
45 | cmake autoconf golang cargo npm clang \\
46 | && mkdir -p /it-depends
47 | # this is required for cargo:
48 | ENV USER=root
49 | COPY . /it-depends
50 | WORKDIR /it-depends
51 | RUN pip3 install .
52 | """)
53 | _CONTAINER = DockerContainer("trailofbits/it-depends", dockerfile=dockerfile, tag="latest")
54 | _CONTAINER.rebuild()
55 | finally:
56 | if not dockerfile_existed and dockerfile.exists():
57 | dockerfile.path.unlink()
58 | return _CONTAINER
59 |
60 |
61 | def rebuild(test: SmokeTest):
62 | print(f"Rebuilding {test.expected_json!s}")
63 | container = get_container()
64 | if container.run(
65 | "it-depends", str(test.snapshot_folder.relative_to(IT_DEPENDS_DIR)), "-f", "json",
66 | "-o", str(test.expected_json.relative_to(IT_DEPENDS_DIR)), "--force",
67 | cwd=IT_DEPENDS_DIR,
68 | check_existence=False, rebuild=False, mounts=(
69 | (test.expected_json.parent, "/it-depends/test/repos"),
70 | ("/var/run/docker.sock", "/var/run/docker.sock"),
71 | ),
72 | privileged=True
73 | ) != 0:
74 | raise ValueError(f"it-depends exited with non-zero status for {test.snapshot_folder}!")
75 | print(f"Updated {test.expected_json!s}")
76 |
77 |
78 | if __name__ == "__main__":
79 | for t in sorted(SMOKE_TESTS, key=lambda st: st.repo_name):
80 | rebuild(t)
81 |
--------------------------------------------------------------------------------
/test/repos/.gitignore:
--------------------------------------------------------------------------------
1 | *.zip
2 | *-*
3 | *.actual.json
--------------------------------------------------------------------------------
/test/repos/cvedb.expected.json:
--------------------------------------------------------------------------------
1 | {
2 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": {
3 | "0.0.0": {
4 | "dependencies": {
5 | "ubuntu:libc6": "*"
6 | },
7 | "source": "ubuntu"
8 | }
9 | },
10 | "ubuntu:libc6": {
11 | "2.31.0": {
12 | "dependencies": {
13 | "ubuntu:libgcc-s1": "*",
14 | "ubuntu:libcrypt1": "*"
15 | },
16 | "source": "ubuntu"
17 | }
18 | },
19 | "pip:cvss": {
20 | "2.2.0": {
21 | "dependencies": {
22 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*",
23 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*",
24 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*",
25 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*"
26 | },
27 | "source": "pip"
28 | }
29 | },
30 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": {
31 | "0.0.0": {
32 | "dependencies": {
33 | "ubuntu:libc6": "*"
34 | },
35 | "source": "ubuntu"
36 | }
37 | },
38 | "ubuntu:libtinfo6": {
39 | "6.2.0": {
40 | "dependencies": {
41 | "ubuntu:libc6": ">=2.16"
42 | },
43 | "source": "ubuntu"
44 | }
45 | },
46 | "pip:six": {
47 | "1.5.0": {
48 | "dependencies": {
49 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*",
50 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*",
51 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*",
52 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*"
53 | },
54 | "source": "pip"
55 | }
56 | },
57 | "ubuntu:libcrypt1": {
58 | "4.4.10": {
59 | "dependencies": {
60 | "ubuntu:libc6": ">=2.25"
61 | },
62 | "source": "ubuntu"
63 | }
64 | },
65 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": {
66 | "0.0.0": {
67 | "dependencies": {
68 | "ubuntu:libc6": "*"
69 | },
70 | "source": "ubuntu"
71 | }
72 | },
73 | "ubuntu:libgcc-s1": {
74 | "10.3.0": {
75 | "dependencies": {
76 | "ubuntu:libc6": ">=2.14",
77 | "ubuntu:gcc-10-base": "*"
78 | },
79 | "source": "ubuntu"
80 | },
81 | "10.0.0": {
82 | "dependencies": {
83 | "ubuntu:libc6": ">=2.14",
84 | "ubuntu:gcc-10-base": "*"
85 | },
86 | "source": "ubuntu"
87 | }
88 | },
89 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": {
90 | "0.0.0": {
91 | "dependencies": {
92 | "ubuntu:libtinfo6": "*"
93 | },
94 | "source": "ubuntu"
95 | }
96 | },
97 | "pip:python-dateutil": {
98 | "2.8.1": {
99 | "dependencies": {
100 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*",
101 | "pip:six": ">=1.5",
102 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*",
103 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*",
104 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*"
105 | },
106 | "source": "pip"
107 | }
108 | },
109 | "pip:cvedb": {
110 | "0.0.4": {
111 | "dependencies": {
112 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*",
113 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*",
114 | "pip:cvss": "~=2.2",
115 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*",
116 | "pip:tqdm": "~=4.48.0",
117 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*",
118 | "pip:python-dateutil": "~=2.8.1"
119 | },
120 | "source": "pip",
121 | "is_source_package": true
122 | }
123 | },
124 | "pip:tqdm": {
125 | "4.48.0": {
126 | "dependencies": {
127 | "ubuntu:/lib/x86_64-linux-gnu/libdl.so.2": "*",
128 | "ubuntu:/lib/x86_64-linux-gnu/libnss_files.so.2": "*",
129 | "ubuntu:/lib/x86_64-linux-gnu/libc.so.6": "*",
130 | "ubuntu:/lib/x86_64-linux-gnu/libtinfo.so.6": "*"
131 | },
132 | "source": "pip"
133 | }
134 | }
135 | }
--------------------------------------------------------------------------------
/test/repos/pe-parse.expected.json:
--------------------------------------------------------------------------------
1 | {
2 | "ubuntu:libboost-filesystem1.71-dev": {
3 | "1.71.0": {
4 | "dependencies": {
5 | "ubuntu:libboost-filesystem1.71.0": "=1.71.0-6ubuntu6",
6 | "ubuntu:libboost1.71-dev": "=1.71.0-6ubuntu6",
7 | "ubuntu:libboost-system1.71-dev": "=1.71.0-6ubuntu6"
8 | },
9 | "source": "ubuntu"
10 | }
11 | },
12 | "pip:pepy": {
13 | "1.3.0": {
14 | "dependencies": {},
15 | "source": "pip",
16 | "is_source_package": true
17 | }
18 | },
19 | "cmake:dump-pe": {
20 | "0.0.0": {
21 | "dependencies": {
22 | "ubuntu:libboost-filesystem1.71-dev": "*"
23 | },
24 | "source": "cmake",
25 | "is_source_package": true
26 | }
27 | }
28 | }
--------------------------------------------------------------------------------
/test/test_apt.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from it_depends.ubuntu.apt import file_to_packages
4 |
5 |
6 | class TestAPT(TestCase):
7 | def test_file_to_package(self):
8 | self.assertEqual(file_to_packages("/usr/bin/python3"), [
9 | 'python3-activipy',
10 | 'python3-breathe',
11 | 'python3-coverage',
12 | 'python3-coverage',
13 | 'python3-cymruwhois',
14 | 'python3-dbg',
15 | 'python3-dbg',
16 | 'python3-dbg',
17 | 'python3-dbg',
18 | 'python3-dev',
19 | 'python3-future',
20 | 'python3-future',
21 | 'python3-memory-profiler',
22 | 'python3-minimal',
23 | 'python3-pbr',
24 | 'python3-petname',
25 | 'python3-pyroute2',
26 | 'python3-pyroute2',
27 | 'python3-pysaml2',
28 | 'python3-pysaml2',
29 | 'python3-pysaml2',
30 | 'python3-pysaml2',
31 | 'python3-qrcode',
32 | 'python3-stem',
33 | 'python3-unidiff',
34 | 'python3-unittest2',
35 | 'python3-websocket',
36 | 'python3.8-dbg',
37 | 'python3.8-dbg',
38 | 'python3.8-dbg',
39 | 'python3.8-dbg',
40 | 'python3.8-dbg',
41 | 'python3.8-dbg',
42 | 'python3.8-dbg',
43 | 'python3.8-dbg',
44 | 'python3.8-dev',
45 | 'python3.8-minimal',
46 | 'python3.9-dbg',
47 | 'python3.9-dbg',
48 | 'python3.9-dbg',
49 | 'python3.9-dbg',
50 | 'python3.9-dbg',
51 | 'python3.9-dbg',
52 | 'python3.9-dbg',
53 | 'python3.9-dbg',
54 | 'python3.9-dev',
55 | 'python3.9-minimal'
56 | ])
57 |
--------------------------------------------------------------------------------
/test/test_audit.py:
--------------------------------------------------------------------------------
1 | import threading
2 | from it_depends.dependencies import InMemoryPackageCache, Package, Vulnerability
3 | from it_depends import audit
4 |
5 | import logging
6 | import random
7 | import string
8 | import time
9 | from unittest import TestCase
10 | from unittest.mock import Mock, patch
11 |
12 |
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | def _rand_str(n):
17 | """Returns a random string of length n (upper, lower and digits)"""
18 | return ''.join(random.choice(string.ascii_lowercase +
19 | string.ascii_uppercase + string.digits)
20 | for i in range(n))
21 |
22 |
23 | def _version_str():
24 | """Returns a typical version string (x.y.z)"""
25 | return f"{random.randint(0, 30)}.{random.randint(0,5)}." \
26 | f"{random.randint(0, 9)}"
27 |
28 |
29 | def _random_package():
30 | """Returns a package of random name, version and source"""
31 | return Package(_rand_str(10), _version_str(), _rand_str(5))
32 |
33 |
34 | def _random_packages(num_packages):
35 | """Returns PackacgeCache populated with num_package random Packages"""
36 | packages = InMemoryPackageCache()
37 | list(map(packages.add, [_random_package() for i in range(num_packages)]))
38 | return packages
39 |
40 |
41 | def _random_vulnerability():
42 | """Create a random vulnerability"""
43 | return Vulnerability(_rand_str(10),
44 | [_rand_str(3) for i in range(random.randint(0, 7)) if
45 | random.randint(0, 100) < 90],
46 | _rand_str(random.randint(0, 10)))
47 |
48 |
49 | def _random_vulnerabilities(max_count):
50 | """Return up to max_count vulnerabilities"""
51 | return [_random_vulnerability() for x in range(random.randint(0, max_count))]
52 |
53 |
54 | class TestAudit(TestCase):
55 | def setUp(self):
56 | # To be able to repeat a failing test the seed for random is logged
57 | seed = int(time.time())
58 | random.seed(seed)
59 | logger.warning(f"Using seed: {seed}")
60 |
61 | @patch('it_depends.audit.post')
62 | def test_nopackages_no_requests(self, mock_post):
63 | packages = _random_packages(0)
64 | ret = audit.vulnerabilities(packages)
65 | self.assertEqual(ret, packages)
66 | mock_post.assert_not_called()
67 |
68 | @patch('it_depends.audit.post')
69 | def test_valid_limited_info_response(self, mock_post):
70 | """Ensures that a single vuln with the minimum amount of info we require works"""
71 | packages = _random_packages(1)
72 | mock_post().json.return_value = {"vulns": [{"id": "123"}]}
73 | ret = audit.vulnerabilities(packages)
74 |
75 | pkg = next(p for p in ret)
76 | vuln = next(v for v in pkg.vulnerabilities) # Assume one vulnerability
77 | self.assertEqual(vuln.id, "123")
78 | self.assertEqual(len(vuln.aliases), 0)
79 | self.assertEqual(vuln.summary, "N/A")
80 |
81 | @patch('it_depends.audit.post')
82 | def test_no_vulns_can_be_handled(self, mock_post):
83 | """No vulnerability info can still be handled"""
84 | packages = _random_packages(1)
85 | mock_post().json.return_value = {}
86 | ret = audit.vulnerabilities(packages)
87 | self.assertTrue(all(map(lambda p: len(p.vulnerabilities) == 0, ret)))
88 |
89 | @patch('it_depends.audit.post')
90 | def test_handles_ten_thousand_requests(self, mock_post):
91 | """Constructs ten thousand random packages and maps random vulnerabilities to the packages.
92 | Ensures that the vulnerability information received from OSV is reflected in the Packages"""
93 |
94 | # Create 10k random packages (name, version, source)
95 | packages = _random_packages(10000)
96 |
97 | # For each of the packages map 0 or more vulnerabilities
98 | package_vuln = {(pkg.name, str(pkg.version)): _random_vulnerabilities(10) for pkg in packages}
99 |
100 | # Mocks the json-request to OSV, returns whatever info is in the package_vuln-map
101 | def _osv_response(_, json):
102 | m = Mock()
103 | key = (json["package"]["name"], json["version"])
104 | if key in package_vuln:
105 | m.json.return_value = {"vulns": list(map(lambda x: x.to_obj(), package_vuln[key]))}
106 | else:
107 | m.json.return_value = {}
108 | return m
109 |
110 | mock_post.side_effect = _osv_response
111 |
112 | # Query all packages for vulnerabilities, ensure that each package received vulnerabilitiy
113 | # info as stated in the package_vuln-map created earlier.
114 | for pkg in audit.vulnerabilities(packages):
115 | pkgvuln = sorted(pkg.vulnerabilities)
116 | expectedvuln = sorted(package_vuln[(pkg.name, str(pkg.version))])
117 |
118 | self.assertListEqual(pkgvuln, expectedvuln)
119 |
120 | @patch('it_depends.audit.post')
121 | def test_exceptions_are_logged_and_isolated(self, mock_post):
122 | """Ensure that if exceptions happen during vulnerability querying they do not kill execution.
123 | They shall still be logged."""
124 | packages = _random_packages(100)
125 | lock = threading.Lock()
126 | counter = 0
127 |
128 | def _osv_response(_, json):
129 | nonlocal counter
130 | m = Mock()
131 | m.json.return_value = {}
132 | with lock:
133 | counter += 1
134 | if counter % 2 == 0:
135 | raise Exception("Ouch.")
136 | return m
137 | mock_post.side_effect = _osv_response
138 |
139 | self.assertEqual(len(audit.vulnerabilities(packages)), 100)
140 |
--------------------------------------------------------------------------------
/test/test_db.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from it_depends.db import DBPackageCache
4 | from it_depends.dependencies import Dependency, DependencyResolver, Package, ResolverAvailability, SimpleSpec, Version, resolvers, resolver_by_name
5 |
6 |
7 | class TestDB(TestCase):
8 | def setUp(self) -> None:
9 | class UnusedResolver(DependencyResolver):
10 | name: str = "unknown"
11 | description: str = "Used for testing"
12 |
13 | def is_available(self) -> ResolverAvailability:
14 | return ResolverAvailability(False, "Unused resolver")
15 |
16 | def can_resolve_from_source(self, repo) -> bool:
17 | return False
18 |
19 | def resolve_from_source(self, repo, cache=None):
20 | raise NotImplementedError()
21 |
22 | self.unknown = UnusedResolver
23 | del UnusedResolver
24 |
25 | def tearDown(self) -> None:
26 | del self.unknown
27 | resolvers.cache_clear()
28 | resolver_by_name.cache_clear()
29 | import gc
30 | gc.collect()
31 | gc.collect()
32 | # remove Unused resolver from Resolvers global set
33 |
34 | def test_db(self):
35 | with DBPackageCache() as cache:
36 | UnusedResolver = self.unknown
37 | pkg = Package(name="package", version=Version.coerce("1.0.0"), source=UnusedResolver(),
38 | dependencies=(Dependency(package="dep", semantic_version=SimpleSpec(">3.0"),
39 | source=UnusedResolver()),))
40 | cache.add(pkg)
41 | self.assertIn(pkg, cache)
42 | self.assertEqual(len(cache), 1)
43 | # re-adding the package should be a NO-OP
44 | cache.add(pkg)
45 | self.assertEqual(len(cache), 1)
46 | # try adding the package again, but with fewer dependencies:
47 | smaller_pkg = Package(name="package", version=Version.coerce("1.0.0"), source=UnusedResolver())
48 | self.assertRaises(ValueError, cache.add, smaller_pkg)
49 |
50 |
--------------------------------------------------------------------------------
/test/test_go.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from it_depends.go import GoModule, GoSpec, GoVersion
4 |
5 |
6 | EXAMPLE_MOD = """
7 | module github.com/btcsuite/btcd
8 |
9 | require (
10 | \tgithub.com/aead/siphash v1.0.1 // indirect
11 | \tgithub.com/btcsuite/btclog v0.0.0-20170628155309-84c8d2346e9f
12 | \tgithub.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d
13 | \tgithub.com/btcsuite/go-socks v0.0.0-20170105172521-4720035b7bfd
14 | \tgithub.com/btcsuite/goleveldb v0.0.0-20160330041536-7834afc9e8cd
15 | \tgithub.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723 // indirect
16 | \tgithub.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792
17 | \tgithub.com/btcsuite/winsvc v1.0.0
18 | \tgithub.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495
19 | \tgithub.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89
20 | \tgithub.com/jrick/logrotate v1.0.0
21 | \tgithub.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23 // indirect
22 | \tgithub.com/onsi/ginkgo v1.7.0 // indirect
23 | \tgithub.com/onsi/gomega v1.4.3 // indirect
24 | \tgolang.org/x/crypto v0.0.0-20170930174604-9419663f5a44
25 | )
26 |
27 | go 1.12
28 | """
29 |
30 |
31 | class TestGo(TestCase):
32 | def test_load_from_github(self):
33 | GoModule.from_git("github.com/golang/protobuf", "https://github.com/golang/protobuf", tag="v1.4.3")
34 |
35 | def test_parsing(self):
36 | module = GoModule.parse_mod(EXAMPLE_MOD)
37 | self.assertEqual(module.name, "github.com/btcsuite/btcd")
38 | self.assertEqual(len(module.dependencies), 15)
39 | self.assertIn(("github.com/btcsuite/websocket", "v0.0.0-20150119174127-31079b680792"), module.dependencies)
40 |
41 | def test_version_parsing(self):
42 | for _, version in GoModule.parse_mod(EXAMPLE_MOD).dependencies:
43 | self.assertEqual(str(GoVersion(version)), version)
44 | self.assertEqual(str(GoSpec(version)), version)
45 |
--------------------------------------------------------------------------------
/test/test_graphs.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from it_depends.graphs import RootedDiGraph
4 |
5 | class Node(int):
6 | pass
7 |
8 |
9 | class Root(Node):
10 | pass
11 |
12 |
13 | class TestGraphs(unittest.TestCase):
14 | def test_single_root(self):
15 | graph: RootedDiGraph[Node, Root] = RootedDiGraph()
16 | graph.root_type = Root
17 | nodes = [Root(0)] + [Node(i) for i in range(1, 5)]
18 | graph.add_node(nodes[0])
19 | graph.add_edge(nodes[0], nodes[1])
20 | graph.add_edge(nodes[0], nodes[2])
21 | graph.add_edge(nodes[1], nodes[3])
22 | graph.add_edge(nodes[2], nodes[4])
23 | self.assertEqual(0, graph.shortest_path_from_root(nodes[0]))
24 | self.assertEqual(1, graph.shortest_path_from_root(nodes[1]))
25 | self.assertEqual(1, graph.shortest_path_from_root(nodes[2]))
26 | self.assertEqual(2, graph.shortest_path_from_root(nodes[3]))
27 | self.assertEqual(2, graph.shortest_path_from_root(nodes[4]))
28 |
29 | def test_two_roots(self):
30 | graph: RootedDiGraph[Node, Root] = RootedDiGraph()
31 | graph.root_type = Root
32 | nodes = [Root(0), Root(1)] + [Node(i) for i in range(2, 5)]
33 | graph.add_node(nodes[0])
34 | graph.add_node(nodes[1])
35 | graph.add_edge(nodes[0], nodes[2])
36 | graph.add_edge(nodes[0], nodes[3])
37 | graph.add_edge(nodes[1], nodes[3])
38 | graph.add_edge(nodes[3], nodes[4])
39 | self.assertEqual(0, graph.shortest_path_from_root(nodes[0]))
40 | self.assertEqual(0, graph.shortest_path_from_root(nodes[1]))
41 | self.assertEqual(1, graph.shortest_path_from_root(nodes[2]))
42 | self.assertEqual(1, graph.shortest_path_from_root(nodes[3]))
43 | self.assertEqual(2, graph.shortest_path_from_root(nodes[4]))
44 |
--------------------------------------------------------------------------------
/test/test_native.py:
--------------------------------------------------------------------------------
1 | from platform import machine
2 | from unittest import TestCase
3 |
4 | from it_depends.dependencies import Package, Version
5 | from it_depends.native import get_native_dependencies
6 |
7 |
8 | def arch_string() -> str:
9 | """Returns an architecture dependendent string for filenames
10 | Current support is only arm64/x86_64."""
11 | # TODO (hbrodin): Make more general.
12 | return "aarch64" if machine() == "arm64" else "x86_64"
13 |
14 |
15 | class TestNative(TestCase):
16 | def test_native(self):
17 | deps = {dep.package for dep in get_native_dependencies(Package(
18 | name="numpy",
19 | version=Version.coerce("1.19.4"),
20 | source="pip"
21 | ))}
22 | arch = arch_string()
23 | self.assertEqual({
24 | f'/lib/{arch}-linux-gnu/libtinfo.so.6', f'/lib/{arch}-linux-gnu/libnss_files.so.2',
25 | f'/lib/{arch}-linux-gnu/libc.so.6', f'/lib/{arch}-linux-gnu/libdl.so.2'
26 | }, deps)
27 |
--------------------------------------------------------------------------------
/test/test_resolver.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from it_depends.resolver import resolve_sbom
4 | from it_depends.sbom import cyclonedx_to_json
5 |
6 | from .test_smoke import SmokeTest
7 |
8 |
9 | class TestResolver(TestCase):
10 | def test_resolve(self):
11 | test = SmokeTest("trailofbits", "it-depends", "3db3d191ce04fb8a19bcc5c000ce84dbb3243f31")
12 | packages = test.run()
13 | for package in packages.source_packages:
14 | for sbom in resolve_sbom(package, packages, order_ascending=True):
15 | # print(str(sbom))
16 | print(cyclonedx_to_json(sbom.to_cyclonedx()))
17 | break
18 |
--------------------------------------------------------------------------------
/test/test_smoke.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 | from unittest import TestCase
3 | from pathlib import Path
4 | import os
5 | import json
6 | from typing import Set
7 | import urllib
8 | import zipfile
9 |
10 | from it_depends.dependencies import (
11 | Dependency, InMemoryPackageCache, List, Optional, Package, PackageRepository, resolve, resolver_by_name, resolvers,
12 | SimpleSpec, SourceRepository, Tuple, Union
13 | )
14 |
15 | IT_DEPENDS_DIR: Path = Path(__file__).absolute().parent.parent
16 | TESTS_DIR: Path = Path(__file__).absolute().parent
17 | REPOS_FOLDER = TESTS_DIR / "repos"
18 |
19 |
20 | class TestResolvers(TestCase):
21 | maxDiff = None
22 |
23 | def test_resolvers(self):
24 | """We see all known resolvers
25 | caveat: Iff an unknown resolver was defined by another test it will appear here
26 | """
27 | resolver_names = {resolver.name for resolver in resolvers()}
28 | self.assertSetEqual(resolver_names, {'cargo', 'ubuntu', 'autotools', 'go', 'cmake', 'npm', 'pip'})
29 | self.assertSetEqual(resolvers(), {resolver_by_name(name) for name in resolver_names})
30 |
31 | def test_objects(self):
32 | # To/From string for nicer output and ergonomics
33 | self.assertEqual(str(Dependency.from_string("pip:cvedb@*")), "pip:cvedb@*")
34 | self.assertEqual(str(Package.from_string("pip:cvedb@0.0.1")), "pip:cvedb@0.0.1")
35 |
36 | # Basic Dependency object handling
37 | dep = Dependency.from_string("pip:cvedb@*")
38 | self.assertEqual(dep.source, "pip")
39 | self.assertEqual(dep.package, "cvedb")
40 | self.assertTrue(dep.semantic_version == SimpleSpec("*"))
41 | self.assertTrue(Dependency(source="pip", package="cvedb", semantic_version=SimpleSpec("*")) ==
42 | dep)
43 |
44 | # Dependency match
45 | solution = Package(source="pip", name="cvedb", version="0.0.1")
46 | self.assertTrue(dep.match(solution))
47 | dep2 = Dependency.from_string("pip:cvedb@<0.2.1")
48 | self.assertTrue(dep2.match(Package.from_string("pip:cvedb@0.2.0")))
49 | self.assertFalse(dep2.match(Package.from_string("pip:cvedb@0.2.1")))
50 |
51 | def _test_resolver(self, resolver, dep):
52 | dep = Dependency.from_string(dep)
53 | resolver = resolver_by_name(resolver)
54 | self.assertIs(dep.resolver, resolver)
55 |
56 | solutions = tuple(resolver.resolve(dep))
57 | self.assertGreater(len(solutions), 0)
58 | for package in solutions:
59 | self.assertEqual(package.source, dep.source)
60 | self.assertEqual(package.name, dep.package)
61 | self.assertTrue(dep.semantic_version.match(package.version))
62 | self.assertTrue(dep.match(package))
63 | return solutions
64 |
65 | def test_determinism(self):
66 | """Test if a resolver gives the same solution multiple times in a row.
67 |
68 | Half of the attempts will be without a cache, and the second half will use the same cache.
69 |
70 | """
71 | cache = InMemoryPackageCache()
72 | to_test: List[Tuple[Union[Dependency, SourceRepository], int]] = [
73 | (Dependency.from_string(dep_name), 5) for dep_name in (
74 | "pip:cvedb@*", "ubuntu:libc6@*", "cargo:rand_core@0.6.2", "npm:crypto-js@4.0.0"
75 | )
76 | ]
77 | to_test.extend([
78 | (smoke_test.source_repo, 3) for smoke_test in SMOKE_TESTS if smoke_test.repo_name in (
79 | "bitcoin",
80 | "pe-parse"
81 | )
82 | ])
83 | for dep, num_attempts in to_test:
84 | with self.subTest(msg=f"Testing the determinism of dep", dep=dep):
85 | first_result: Set[Package] = set()
86 | for i in range(num_attempts):
87 | if i < num_attempts // 2:
88 | attempt_cache: Optional[InMemoryPackageCache] = None
89 | else:
90 | attempt_cache = cache
91 | result = set(resolve(dep, cache=attempt_cache))
92 | if i == 0:
93 | first_result = result
94 | else:
95 | self.assertEqual(first_result, result,
96 | msg=f"Results differed on attempt {i + 1} at resolving {dep}")
97 |
98 | def test_pip(self):
99 | self._test_resolver("pip", "pip:cvedb@*")
100 |
101 | def test_ubuntu(self):
102 | self._test_resolver("ubuntu", "ubuntu:libc6@*")
103 |
104 | def test_cargo(self):
105 | self._test_resolver("cargo", "cargo:rand_core@0.6.2")
106 |
107 | def test_npm(self):
108 | self._test_resolver("npm", "npm:crypto-js@4.0.0")
109 |
110 |
111 | class SmokeTest:
112 | def __init__(self, user_name: str, repo_name: str, commit: str):
113 | self.user_name: str = user_name
114 | self.repo_name: str = repo_name
115 | self.commit: str = commit
116 |
117 | self.url: str = f"https://github.com/{user_name}/{repo_name}/archive/{commit}.zip"
118 | self._snapshot_folder: Path = REPOS_FOLDER / (repo_name + "-" + commit)
119 | self._snapshot_zip: Path = self._snapshot_folder.with_suffix(".zip")
120 |
121 | self.expected_json: Path = REPOS_FOLDER / f"{repo_name}.expected.json"
122 | self.actual_json: Path = REPOS_FOLDER / f"{repo_name}.actual.json"
123 |
124 | @property
125 | def snapshot_folder(self) -> Path:
126 | if not self._snapshot_folder.exists():
127 | urllib.request.urlretrieve(self.url, self._snapshot_zip)
128 | with zipfile.ZipFile(self._snapshot_zip, "r") as zip_ref:
129 | zip_ref.extractall(REPOS_FOLDER)
130 | return self._snapshot_folder
131 |
132 | @property
133 | def source_repo(self) -> SourceRepository:
134 | return SourceRepository(self.snapshot_folder)
135 |
136 | def run(self) -> PackageRepository:
137 | return resolve(self.source_repo)
138 |
139 | def __hash__(self):
140 | return hash((self.user_name, self.repo_name, self.commit))
141 |
142 | def __eq__(self, other):
143 | return (
144 | isinstance(other, SmokeTest) and self.user_name == other.user_name and self.repo_name == other.repo_name
145 | and self.commit == other.commit
146 | )
147 |
148 |
149 | SMOKE_TESTS: Set[SmokeTest] = set()
150 |
151 |
152 | def gh_smoke_test(user_name: str, repo_name: str, commit: str):
153 | smoke_test = SmokeTest(user_name, repo_name, commit)
154 | SMOKE_TESTS.add(smoke_test)
155 |
156 | def do_smoke_test(func):
157 | @wraps(func)
158 | def wrapper(self: TestCase):
159 | package_list = smoke_test.run()
160 | result_it_depends = package_list.to_obj()
161 | with open(smoke_test.actual_json, "w") as f:
162 | f.write(json.dumps(result_it_depends, indent=4, sort_keys=True))
163 |
164 | if not smoke_test.expected_json.exists():
165 | raise ValueError(f"File {smoke_test.expected_json.absolute()} needs to be created! See "
166 | f"{smoke_test.actual_json.absolute()} for the output of the most recent run.")
167 | with open(smoke_test.expected_json, "r") as f:
168 | expected = json.load(f)
169 | if result_it_depends != expected:
170 | print(f"See {smoke_test.actual_json.absolute()} for the result of this run.")
171 | self.assertEqual(result_it_depends, expected)
172 |
173 | return func(self, package_list)
174 |
175 | return wrapper
176 |
177 | return do_smoke_test
178 |
179 |
180 | class TestSmoke(TestCase):
181 | maxDiff = None
182 |
183 | def setUp(self) -> None:
184 | if not os.path.exists(REPOS_FOLDER):
185 | os.makedirs(REPOS_FOLDER)
186 |
187 | @gh_smoke_test("trailofbits", "cvedb", "7441dc0e238e31829891f85fd840d9e65cb629d8")
188 | def __test_pip(self, package_list):
189 | pass
190 |
191 | @gh_smoke_test("trailofbits", "siderophile", "7bca0f5a73da98550c29032f6a2a170f472ea241")
192 | def __test_cargo(self, package_list):
193 | pass
194 |
195 | @gh_smoke_test("bitcoin", "bitcoin", "4a267057617a8aa6dc9793c4d711725df5338025")
196 | def __test_autotools(self, package_list):
197 | pass
198 |
199 | @gh_smoke_test("brix", "crypto-js", "971c31f0c931f913d22a76ed488d9216ac04e306")
200 | def __test_npm(self, package_list):
201 | pass
202 |
203 | # @gh_smoke_test("lifting-bits", "rellic", "9cf73b288a3d0c51d5de7e1060cba8656538596f")
204 | @gh_smoke_test("trailofbits", "pe-parse", "94bd12ac539382c303896f175a1ab16352e65a8f")
205 | def __test_cmake(self, package_list):
206 | pass
207 |
--------------------------------------------------------------------------------
/test/test_ubuntu.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 | from unittest.mock import patch
3 | from it_depends.dependencies import Dependency
4 | from it_depends.ubuntu.resolver import UbuntuResolver
5 |
6 |
7 | class TestUbuntu(TestCase):
8 | def test_ubuntu(self):
9 | contents = """Package: dkms
10 | Version: 2.8.1-5ubuntu2
11 | Priority: optional
12 | Section: admin
13 | Origin: Ubuntu
14 | Maintainer: Ubuntu Developers
15 | Original-Maintainer: Dynamic Kernel Modules Support Team
16 | Bugs: https://bugs.launchpad.net/ubuntu/+filebug
17 | Installed-Size: 296 kB
18 | Pre-Depends: lsb-release
19 | Depends: kmod | kldutils, gcc | c-compiler, dpkg-dev, make | build-essential, coreutils (>= 7.3), patch, dctrl-tools
20 | Recommends: fakeroot, sudo, linux-headers-686-pae | linux-headers-amd64 | linux-headers-generic | linux-headers
21 | Suggests: menu, e2fsprogs
22 | Breaks: shim-signed (<< 1.34~)
23 | Homepage: https://github.com/dell-oss/dkms
24 | Download-Size: 66,8 kB
25 | APT-Manual-Installed: no
26 | APT-Sources: http://ar.archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages
27 | Description: Dynamic Kernel Module Support Framework
28 | DKMS is a framework designed to allow individual kernel modules to be upgraded
29 | without changing the whole kernel. It is also very easy to rebuild modules as
30 | you upgrade kernels.
31 |
32 | Package: dkms
33 | Version: 2.8.1-5ubuntu1
34 | Priority: optional
35 | Section: admin
36 | Origin: Ubuntu
37 | Maintainer: Ubuntu Developers
38 | Original-Maintainer: Dynamic Kernel Modules Support Team
39 | Bugs: https://bugs.launchpad.net/ubuntu/+filebug
40 | Installed-Size: 296 kB
41 | Pre-Depends: lsb-release
42 | Depends: kmod | kldutils, gcc | c-compiler, dpkg-dev, make | build-essential, coreutils (>= 7.5), patch
43 | Recommends: fakeroot, sudo, linux-headers-686-pae | linux-headers-amd64 | linux-headers-generic | linux-headers
44 | Suggests: menu, e2fsprogs
45 | Breaks: shim-signed (<< 1.34~)
46 | Homepage: https://github.com/dell-oss/dkms
47 | Download-Size: 66,6 kB
48 | APT-Sources: http://ar.archive.ubuntu.com/ubuntu focal/main amd64 Packages
49 | Description: Dynamic Kernel Module Support Framework
50 | DKMS is a framework designed to allow individual kernel modules to be upgraded
51 | without changing the whole kernel. It is also very easy to rebuild modules as
52 | you upgrade kernels.
53 |
54 | """
55 | with patch('it_depends.ubuntu.docker.run_command') as mock:
56 | mock.return_value = contents.encode()
57 | deps = tuple(UbuntuResolver().resolve(dependency=Dependency(package="dkms", source="ubuntu")))
58 | self.assertEqual(len(deps), 1)
59 | self.assertEqual(str(deps[0]), 'ubuntu:dkms@2.8.1[ubuntu:build-essential@*,ubuntu:c-compiler@*,'
60 | 'ubuntu:coreutils@>=7.4,ubuntu:dctrl-tools@*,ubuntu:dpkg-dev@*,'
61 | 'ubuntu:gcc@*,ubuntu:kldutils@*,ubuntu:kmod@*,ubuntu:make@*,ubuntu:patch@*]')
62 |
--------------------------------------------------------------------------------
/test/test_vcs.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from it_depends.vcs import resolve
4 |
5 |
6 | class TestVCS(TestCase):
7 | def test_resolve(self):
8 | repo = resolve("github.com/trailofbits/graphtage")
9 | self.assertEqual(repo.vcs.name, "Git")
10 |
--------------------------------------------------------------------------------