├── .bouncer.yaml ├── .github └── workflows │ └── codeql.yml ├── .gitignore ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── DEVELOPING.md ├── Dockerfile ├── Dockerfile.debug ├── LICENSE ├── Makefile ├── README.md ├── RELEASE.md ├── __pycache__ └── scan.cpython-310.pyc ├── app_patrol.d ├── app_patrol.db ├── artifacthub-repo.yml ├── assets └── chatcve_ui.png ├── chat_cve.py ├── fetch_daily_nvd_cves.py ├── frontend ├── app.py ├── static │ └── style.css └── templates │ └── index.html ├── images.txt ├── node_modules └── .package-lock.json ├── package-lock.json ├── package.json ├── requirements.txt ├── scan.py └── tests ├── __pycache__ └── test_scan.cpython-310.pyc └── test_scan.py /.bouncer.yaml: -------------------------------------------------------------------------------- 1 | permit: 2 | - BSD.* 3 | - CC0.* 4 | - MIT.* 5 | - Apache.* 6 | - MPL.* 7 | - ISC 8 | - WTFPL 9 | 10 | ignore-packages: 11 | # packageurl-go is released under the MIT license located in the root of the repo at /mit.LICENSE 12 | - github.com/anchore/packageurl-go 13 | 14 | # both of these dependencies are specified as Apache-2.0 in their respective GitHub READMEs 15 | - github.com/alibabacloud-go/cr-20160607/client 16 | - github.com/alibabacloud-go/tea-xml/service 17 | 18 | # crypto/internal/boring is released under the openSSL license as a part of the Golang Standard Libary 19 | - crypto/internal/boring 20 | 21 | # from: https://github.com/spdx/tools-golang/blob/main/LICENSE.code 22 | # The tools-golang source code is provided and may be used, at your option, 23 | # under either: 24 | # * Apache License, version 2.0 (Apache-2.0), OR 25 | # * GNU General Public License, version 2.0 or later (GPL-2.0-or-later). 26 | # (we choose Apache-2.0) 27 | - github.com/spdx/tools-golang 28 | 29 | # from: https://github.com/xi2/xz/blob/master/LICENSE 30 | # All these files have been put into the public domain. 31 | # You can do whatever you want with these files. 32 | - github.com/xi2/xz 33 | 34 | # from: https://gitlab.com/cznic/sqlite/-/blob/v1.15.4/LICENSE 35 | # This is a BSD-3-Clause license 36 | - modernc.org/libc 37 | - modernc.org/libc/errno 38 | - modernc.org/libc/fcntl 39 | - modernc.org/libc/fts 40 | - modernc.org/libc/grp 41 | - modernc.org/libc/langinfo 42 | - modernc.org/libc/limits 43 | - modernc.org/libc/netdb 44 | - modernc.org/libc/netinet/in 45 | - modernc.org/libc/poll 46 | - modernc.org/libc/pthread 47 | - modernc.org/libc/pwd 48 | - modernc.org/libc/signal 49 | - modernc.org/libc/stdio 50 | - modernc.org/libc/stdlib 51 | - modernc.org/libc/sys/socket 52 | - modernc.org/libc/sys/stat 53 | - modernc.org/libc/sys/types 54 | - modernc.org/libc/termios 55 | - modernc.org/libc/time 56 | - modernc.org/libc/unistd 57 | - modernc.org/libc/utime 58 | - modernc.org/libc/uuid/uuid 59 | - modernc.org/libc/wctype 60 | - modernc.org/mathutil 61 | - modernc.org/memory 62 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "master" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "master" ] 20 | schedule: 21 | - cron: '42 4 * * 3' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | # Runner size impacts CodeQL analysis time. To learn more, please see: 27 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 28 | # - https://gh.io/supported-runners-and-hardware-resources 29 | # - https://gh.io/using-larger-runners 30 | # Consider using larger runners for possible analysis time improvements. 31 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 32 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 33 | permissions: 34 | actions: read 35 | contents: read 36 | security-events: write 37 | 38 | strategy: 39 | fail-fast: false 40 | matrix: 41 | language: [ 'python' ] 42 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] 43 | # Use only 'java' to analyze code written in Java, Kotlin or both 44 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 45 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 46 | 47 | steps: 48 | - name: Checkout repository 49 | uses: actions/checkout@v3 50 | 51 | # Initializes the CodeQL tools for scanning. 52 | - name: Initialize CodeQL 53 | uses: github/codeql-action/init@v2 54 | with: 55 | languages: ${{ matrix.language }} 56 | # If you wish to specify custom queries, you can do so here or in a config file. 57 | # By default, queries listed here will override any specified in a config file. 58 | # Prefix the list here with "+" to use these queries and those in the config file. 59 | 60 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 61 | # queries: security-extended,security-and-quality 62 | 63 | 64 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 65 | # If this step fails, then you should remove it and run the build manually (see below) 66 | - name: Autobuild 67 | uses: github/codeql-action/autobuild@v2 68 | 69 | # ℹ️ Command-line programs to run using the OS shell. 70 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 71 | 72 | # If the Autobuild fails above, remove it and uncomment the following three lines. 73 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 74 | 75 | # - run: | 76 | # echo "Run, Build Application using script" 77 | # ./location_of_script_within_repo/buildscript.sh 78 | 79 | - name: Perform CodeQL Analysis 80 | uses: github/codeql-action/analyze@v2 81 | with: 82 | category: "/language:${{matrix.language}}" 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | CHANGELOG.md 2 | VERSION 3 | /test/results 4 | /dist 5 | /snapshot 6 | .server/ 7 | .vscode/ 8 | .history/ 9 | *.fingerprint 10 | *.tar 11 | *.jar 12 | *.war 13 | *.ear 14 | *.jpi 15 | *.hpi 16 | *.zip 17 | .idea/ 18 | *.log 19 | .images 20 | .tmp/ 21 | coverage.txt 22 | bin/ 23 | .env 24 | 25 | # Binaries for programs and plugins 26 | *.exe 27 | *.exe~ 28 | *.dll 29 | *.so 30 | *.dylib 31 | 32 | # Test binary, build with `go test -c` 33 | *.test 34 | 35 | # Output of the go coverage tool, specifically when used with LiteIDE 36 | *.out 37 | 38 | # macOS Finder metadata 39 | .DS_STORE 40 | 41 | *.profile 42 | 43 | # attestation 44 | cosign.key 45 | cosign.pub 46 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | [#](#) Contributing to Syft 2 | 3 | If you are looking to contribute to this project and want to open a GitHub pull request ("PR"), there are a few guidelines of what we are looking for in patches. Make sure you go through this document and ensure that your code proposal is aligned. 4 | 5 | ## Setting up your environment 6 | 7 | Before you can contribute to Syft, you need to configure your development environment. 8 | 9 | ### Debian setup 10 | 11 | You will need to install Go. The version on https://go.dev works best, using the system golang doesn't always work the way you might expect. 12 | 13 | Refer to the go.mod file in the root of this repo for the recommended version of Go to install. 14 | 15 | You will also need Docker. There's no reason the system packages shouldn't work, but we used the official Docker package. You can find instructions for installing Docker in Debian [here](https://docs.docker.com/engine/install/debian/). 16 | 17 | You also need to install some Debian packages 18 | 19 | ```sh 20 | sudo apt-get install build-essential zip bc libxml2-utils git 21 | ``` 22 | 23 | ## Configuring Git 24 | 25 | You will need to configure your git client with your name and email address. This is easily done from the command line. 26 | 27 | ```text 28 | $ git config --global user.name "John Doe" 29 | $ git config --global user.email "john.doe@example.com" 30 | ``` 31 | 32 | This username and email address will matter later in this guide. 33 | 34 | ## Fork the repo 35 | 36 | You should fork the Syft repo using the "Fork" button at the top right of the Syft GitHub [site](https://github.com/anchore/syft/). You will be doing your development in your fork, then submit a pull request to Syft. There are many resources how to use GitHub effectively, we will not cover those here. 37 | 38 | ## Adding a feature or fix 39 | 40 | If you look at the Syft [Issue](https://github.com/anchore/syft/issues) there are plenty of bugs and feature requests. Maybe look at the [good first issue](https://github.com/anchore/syft/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) list if you're not sure where to start. 41 | 42 | ## Commit guidelines 43 | 44 | In the Syft project we like commits and pull requests (PR) to be easy to understand and review. Open source thrives best when everything happening is over documented and small enough to be understood. 45 | 46 | ### Granular commits 47 | 48 | Please try to make every commit as simple as possible, but no simpler. The idea is that each commit should be a logical unit of code. Try not to commit too many tiny changes, for example every line changed in a file as a separate commit. And also try not to make a commit enormous, for example committing all your work at the end of the day. 49 | 50 | Rather than try to follow a strict guide on what is or is not best, we try to be flexible and simple in this space. Do what makes the most sense for the changes you are trying to include. 51 | 52 | ### Commit title and description 53 | 54 | Remember that the message you leave for a commit is for the reviewer in the present, and for someone (maybe you) changing something in the future. Please make sure the title and description used is easy to understand and explains what was done. Jokes and clever comments generally don't age well in commit messages. Just the facts please. 55 | 56 | ## Sign off your work 57 | 58 | The `sign-off` is an added line at the end of the explanation for the commit, certifying that you wrote it or otherwise have the right to submit it as an open-source patch. By submitting a contribution, you agree to be bound by the terms of the DCO Version 1.1 and Apache License Version 2.0. 59 | 60 | Signing off a commit certifies the below Developer's Certificate of Origin (DCO): 61 | 62 | ```text 63 | Developer's Certificate of Origin 1.1 64 | 65 | By making a contribution to this project, I certify that: 66 | 67 | (a) The contribution was created in whole or in part by me and I 68 | have the right to submit it under the open source license 69 | indicated in the file; or 70 | 71 | (b) The contribution is based upon previous work that, to the best 72 | of my knowledge, is covered under an appropriate open source 73 | license and I have the right under that license to submit that 74 | work with modifications, whether created in whole or in part 75 | by me, under the same open source license (unless I am 76 | permitted to submit under a different license), as indicated 77 | in the file; or 78 | 79 | (c) The contribution was provided directly to me by some other 80 | person who certified (a), (b) or (c) and I have not modified 81 | it. 82 | 83 | (d) I understand and agree that this project and the contribution 84 | are public and that a record of the contribution (including all 85 | personal information I submit with it, including my sign-off) is 86 | maintained indefinitely and may be redistributed consistent with 87 | this project or the open source license(s) involved. 88 | ``` 89 | 90 | All contributions to this project are licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/). 91 | 92 | When committing your change, you can add the required line manually so that it looks like this: 93 | 94 | ```text 95 | Signed-off-by: John Doe 96 | ``` 97 | 98 | Creating a signed-off commit is then possible with `-s` or `--signoff`: 99 | 100 | ```text 101 | $ git commit -s -m "this is a commit message" 102 | ``` 103 | 104 | To double-check that the commit was signed-off, look at the log output: 105 | 106 | ```text 107 | $ git log -1 108 | commit 37ceh170e4hb283bb73d958f2036ee5k07e7fde7 (HEAD -> issue-35, origin/main, main) 109 | Author: John Doe 110 | Date: Mon Aug 1 11:27:13 2020 -0400 111 | 112 | this is a commit message 113 | 114 | Signed-off-by: John Doe 115 | ``` 116 | 117 | ## Test your changes 118 | 119 | This project has a `Makefile` which includes many helpers running both unit and integration tests. You can run `make help` to see all the options. Although PRs will have automatic checks for these, it is useful to run them locally, ensuring they pass before submitting changes. Ensure you've bootstrapped once before running tests: 120 | 121 | ```text 122 | $ make bootstrap 123 | ``` 124 | 125 | You only need to bootstrap once. After the bootstrap process, you can run the tests as many times as needed: 126 | 127 | ```text 128 | $ make unit 129 | $ make integration 130 | ``` 131 | 132 | You can also run `make all` to run a more extensive test suite, but there is additional configuration that will be needed for those tests to run correctly. We will not cover the extra steps here. 133 | 134 | ## Pull Request 135 | 136 | If you made it this far and all the tests are passing, it's time to submit a Pull Request (PR) for Syft. Submitting a PR is always a scary moment as what happens next can be an unknown. The Syft project strives to be easy to work with, we appreciate all contributions. Nobody is going to yell at you or try to make you feel bad. We love contributions and know how scary that first PR can be. 137 | 138 | ### PR Title and Description 139 | 140 | Just like the commit title and description mentioned above, the PR title and description is very important for letting others know what's happening. Please include any details you think a reviewer will need to more properly review your PR. 141 | 142 | A PR that is very large or poorly described has a higher likelihood of being pushed to the end of the list. Reviewers like PRs they can understand and quickly review. 143 | 144 | ### What to expect next 145 | 146 | Please be patient with the project. We try to review PRs in a timely manner, but this is highly dependent on all the other tasks we have going on. It's OK to ask for a status update every week or two, it's not OK to ask for a status update every day. 147 | 148 | It's very likely the reviewer will have questions and suggestions for changes to your PR. If your changes don't match the current style and flow of the other code, expect a request to change what you've done. 149 | 150 | ## Document your changes 151 | 152 | And lastly, when proposed changes are modifying user-facing functionality or output, it is expected the PR will include updates to the documentation as well. Syft is not a project that is heavy on documentation. This will mostly be updating the README and help for the tool. 153 | 154 | If nobody knows new features exist, they can't use them! 155 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Syft Contributors 2 | 3 | The following Syft components were contributed by external authors/organizations. 4 | 5 | ## GraalVM Native Image 6 | 7 | A cataloger contributed by Oracle Corporation that extracts packages given within GraalVM Native Image SBOMs. 8 | -------------------------------------------------------------------------------- /DEVELOPING.md: -------------------------------------------------------------------------------- 1 | # Developing 2 | 3 | ## Getting started 4 | 5 | In order to test and develop in this repo you will need the following dependencies installed: 6 | - Golang 7 | - docker 8 | - make 9 | 10 | After cloning the following step can help you get setup: 11 | 1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities. 12 | 2. run `make` to view the selection of developer commands in the Makefile 13 | 3. run `make build` to build the release snapshot binaries and packages 14 | 4. for an even quicker start you can run `go run cmd/syft/main.go` to print the syft help. 15 | - this command `go run cmd/syft/main.go alpine:latest` will compile and run syft against `alpine:latest` 16 | 5. view the README or syft help output for more output options 17 | 18 | The main make tasks for common static analysis and testing are `lint`, `format`, `lint-fix`, `unit`, `integration`, and `cli`. 19 | 20 | See `make help` for all the current make tasks. 21 | 22 | ## Architecture 23 | 24 | Syft is used to generate a Software Bill of Materials (SBOM) from different kinds of input. 25 | 26 | ### Code organization for the cmd package 27 | 28 | Syft's entrypoint can be found in the `cmd` package at `cmd/syft/main.go`. `main.go` builds a new syft `cli` via `cli.New()` 29 | and then executes the `cli` via `cli.Execute()`. The `cli` package is responsible for parsing command line arguments, 30 | setting up the application context and configuration, and executing the application. Each of syft's commands 31 | (e.g. `packages`, `attest`, `version`) are implemented as a `cobra.Command` in their respective `.go` files. 32 | They are registered in `syft/cli/commands/go`. 33 | ``` 34 | . 35 | └── syft/ 36 | ├── cli/ 37 | │ ├── attest/ 38 | │ ├── attest.go 39 | │ ├── commands.go 40 | │ ├── completion.go 41 | │ ├── convert/ 42 | │ ├── convert.go 43 | │ ├── eventloop/ 44 | │ ├── options/ 45 | │ ├── packages/ 46 | │ ├── packages.go 47 | │ ├── poweruser/ 48 | │ ├── poweruser.go 49 | │ └── version.go 50 | └── main.go 51 | ``` 52 | 53 | #### Execution flow 54 | 55 | ```mermaid 56 | sequenceDiagram 57 | participant main as cmd/syft/main 58 | participant cli as cli.New() 59 | participant root as root.Execute() 60 | participant cmd as .Execute() 61 | 62 | main->>+cli: 63 | 64 | Note right of cli: wire ALL CLI commands 65 | Note right of cli: add flags for ALL commands 66 | 67 | cli-->>-main: root command 68 | 69 | main->>+root: 70 | root->>+cmd: 71 | cmd-->>-root: (error) 72 | 73 | root-->>-main: (error) 74 | 75 | Note right of cmd: Execute SINGLE command from USER 76 | ``` 77 | 78 | ### Code organization for syft library 79 | 80 | Syft's core library (see, exported) functionality is implemented in the `syft` package. The `syft` package is responsible for organizing the core 81 | SBOM data model, it's translated output formats, and the core SBOM generation logic. 82 | 83 | - analysis creates a static SBOM which can be encoded and decoded 84 | - format objects, should strive to not add or enrich data in encoding that could otherwise be done during analysis 85 | - package catalogers and their organization can be viewed/added to the `syft/pkg/cataloger` package 86 | - file catalogers and their organization can be viewed/added to the `syft/file` package 87 | - The source package provides an abstraction to allow a user to loosely define a data source that can be cataloged 88 | 89 | #### Code example of syft as a library 90 | 91 | Here is a gist of using syft as a library to generate a SBOM for a docker image: [link](https://gist.github.com/wagoodman/57ed59a6d57600c23913071b8470175b). 92 | The execution flow for the example is detailed below. 93 | 94 | #### Execution flow examples for the syft library 95 | 96 | ```mermaid 97 | sequenceDiagram 98 | participant source as source.New(ubuntu:latest) 99 | participant sbom as sbom.SBOM 100 | participant catalog as syft.CatalogPackages(src) 101 | participant encoder as syft.Encode(sbom, format) 102 | 103 | Note right of source: use "ubuntu:latest" as SBOM input 104 | 105 | source-->>+sbom: add source to SBOM struct 106 | source-->>+catalog: pass src to generate catalog 107 | catalog-->-sbom: add cataloging results onto SBOM 108 | sbom-->>encoder: pass SBOM and format desiered to syft encoder 109 | encoder-->>source: return bytes that are the SBOM of the original input 110 | 111 | Note right of catalog: cataloger configuration is done based on src 112 | ``` 113 | 114 | 115 | ### Syft Catalogers 116 | 117 | ##### Summary 118 | 119 | Catalogers are the way in which syft is able to identify and construct packages given some amount of source metadata. 120 | For example, Syft can locate and process `package-lock.json` files when performing filesystem scans. 121 | See: [how to specify file globs](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) 122 | and an implementation of the [package-lock.json parser](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) for a quick review. 123 | 124 | From a high level catalogers have the following properties: 125 | 126 | - They are independent from one another. The java cataloger has no idea of the processes, assumptions, or results of the python cataloger, for example. 127 | 128 | - They do not know what source is being analyzed. Are we analyzing a local directory? an image? if so, the squashed representation or all layers? The catalogers do not know the answers to these questions. Only that there is an interface to query for file paths and contents from an underlying "source" being scanned. 129 | 130 | - Packages created by the cataloger should not be mutated after they are created. There is one exception made for adding CPEs to a package after the cataloging phase, but that will most likely be moved back into the cataloger in the future. 131 | 132 | #### Building a new Cataloger 133 | 134 | Catalogers must fulfill the interface [found here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger.go). 135 | This means that when building a new cataloger, the new struct must implement both method signatures of `Catalog` and `Name`. 136 | 137 | A top level view of the functions that construct all the catalogers can be found [here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/cataloger.go). 138 | When an author has finished writing a new cataloger this is the spot to plug in the new catalog constructor. 139 | 140 | For a top level view of how the catalogers are used see [this function](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/catalog.go#L41-L100) as a reference. It ranges over all catalogers passed as an argument and invokes the `Catalog` method: 141 | 142 | Each cataloger has its own `Catalog` method, but this does not mean that they are all vastly different. 143 | Take a look at the `apkdb` cataloger for alpine to see how it [constructs a generic.NewCataloger](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/cataloger.go). 144 | 145 | `generic.NewCataloger` is an abstraction syft uses to make writing common components easier. First, it takes the `catalogerName` to identify the cataloger. 146 | On the other side of the call it uses two key pieces which inform the cataloger how to identify and return packages, the `globPatterns` and the `parseFunction`: 147 | - The first piece is a `parseByGlob` matching pattern used to identify the files that contain the package metadata. 148 | See [here for the APK example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/apk_metadata.go#L16-L41). 149 | - The other is a `parseFunction` which informs the cataloger what to do when it has found one of the above matches files. 150 | See this [link for an example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L22-L102). 151 | 152 | If you're unsure about using the `Generic Cataloger` and think the use case being filled requires something more custom 153 | just file an issue or ask in our slack, and we'd be more than happy to help on the design. 154 | 155 | Identified packages share a common struct so be sure that when the new cataloger is constructing a new package it is using the [`Package` struct](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/package.go#L16-L31). 156 | 157 | Metadata Note: Identified packages are also assigned specific metadata that can be unique to their environment. 158 | See [this folder](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg) for examples of the different metadata types. 159 | These are plugged into the `MetadataType` and `Metadata` fields in the above struct. `MetadataType` informs which type is being used. `Metadata` is an interface converted to that type. 160 | 161 | Finally, here is an example of where the package construction is done in the apk cataloger. The first link is where `newPackage` is called in the `parseFunction`. The second link shows the package construction: 162 | - [Call for new package](https://github.com/anchore/syft/blob/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L106) 163 | - [APK Package Constructor](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/package.go#L12-L27) 164 | 165 | If you have more questions about implementing a cataloger or questions about one you might be currently working 166 | always feel free to file an issue or reach out to us [on slack](https://anchore.com/slack). 167 | 168 | #### Searching for files 169 | 170 | All catalogers are provided an instance of the [`source.FileResolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these 171 | abstractions leverage [`stereoscope`](https://github.com/anchore/stereoscope) in order to perform searching. Here is a 172 | rough outline how that works: 173 | 174 | 1. a stereoscope `file.Index` is searched based on the input given (a path, glob, or MIME type). The index is relatively fast to search, but requires results to be filtered down to the files that exist in the specific layer(s) of interest. This is done automatically by the `filetree.Searcher` abstraction. This abstraction will fallback to searching directly against the raw `filetree.FileTree` if the index does not contain the file(s) of interest. Note: the `filetree.Searcher` is used by the `source.FileResolver` abstraction. 175 | 2. Once the set of files are returned from the `filetree.Searcher` the results are filtered down further to return the most unique file results. For example, you may have requested for files by a glob that returns multiple results. These results are filtered down to deduplicate by real files, so if a result contains two references to the same file, say one accessed via symlink and one accessed via the real path, then the real path reference is returned and the symlink reference is filtered out. If both were accessed by symlink then the first (by lexical order) is returned. This is done automatically by the `source.FileResolver` abstraction. 176 | 3. By the time results reach the `pkg.Cataloger` you are guaranteed to have a set of unique files that exist in the layer(s) of interest (relative to what the resolver supports). 177 | 178 | ## Testing 179 | 180 | ### Levels of testing 181 | 182 | - `unit`: The default level of test which is distributed throughout the repo are unit tests. Any `_test.go` file that 183 | does not reside somewhere within the `/test` directory is a unit test. Other forms of testing should be organized in 184 | the `/test` directory. These tests should focus on correctness of functionality in depth. % test coverage metrics 185 | only considers unit tests and no other forms of testing. 186 | 187 | - `integration`: located within `test/integration`, these tests focus on the behavior surfaced by the common library 188 | entrypoints from the `syft` package and make light assertions about the results surfaced. Additionally, these tests 189 | tend to make diversity assertions for enum-like objects, ensuring that as enum values are added to a definition 190 | that integration tests will automatically fail if no test attempts to use that enum value. For more details see 191 | the "Data diversity and freshness assertions" section below. 192 | 193 | - `cli`: located with in `test/cli`, these are tests that test the correctness of application behavior from a 194 | snapshot build. This should be used in cases where a unit or integration test will not do or if you are looking 195 | for in-depth testing of code in the `cmd/` package (such as testing the proper behavior of application configuration, 196 | CLI switches, and glue code before syft library calls). 197 | 198 | - `acceptance`: located within `test/compare` and `test/install`, these are smoke-like tests that ensure that application 199 | packaging and installation works as expected. For example, during release we provide RPM packages as a download 200 | artifact. We also have an accompanying RPM acceptance test that installs the RPM from a snapshot build and ensures the 201 | output of a syft invocation matches canned expected output. New acceptance tests should be added for each release artifact 202 | and architecture supported (when possible). 203 | 204 | ### Data diversity and freshness assertions 205 | 206 | It is important that tests against the codebase are flexible enough to begin failing when they do not cover "enough" 207 | of the objects under test. "Cover" in this case does not mean that some percentage of the code has been executed 208 | during testing, but instead that there is enough diversity of data input reflected in testing relative to the 209 | definitions available. 210 | 211 | For instance, consider an enum-like value like so: 212 | ```go 213 | type Language string 214 | 215 | const ( 216 | Java Language = "java" 217 | JavaScript Language = "javascript" 218 | Python Language = "python" 219 | Ruby Language = "ruby" 220 | Go Language = "go" 221 | ) 222 | ``` 223 | 224 | Say we have a test that exercises all the languages defined today: 225 | 226 | ```go 227 | func TestCatalogPackages(t *testing.T) { 228 | testTable := []struct { 229 | // ... the set of test cases that test all languages 230 | } 231 | for _, test := range cases { 232 | t.Run(test.name, func (t *testing.T) { 233 | // use inputFixturePath and assert that syft.CatalogPackages() returns the set of expected Package objects 234 | // ... 235 | }) 236 | } 237 | } 238 | ``` 239 | 240 | Where each test case has a `inputFixturePath` that would result with packages from each language. This test is 241 | brittle since it does not assert that all languages were exercised directly and future modifications (such as 242 | adding a new language) won't be covered by any test cases. 243 | 244 | To address this the enum-like object should have a definition of all objects that can be used in testing: 245 | 246 | ```go 247 | type Language string 248 | 249 | // const( Java Language = ..., ... ) 250 | 251 | var AllLanguages = []Language{ 252 | Java, 253 | JavaScript, 254 | Python, 255 | Ruby, 256 | Go, 257 | Rust, 258 | } 259 | ``` 260 | 261 | Allowing testing to automatically fail when adding a new language: 262 | 263 | ```go 264 | func TestCatalogPackages(t *testing.T) { 265 | testTable := []struct { 266 | // ... the set of test cases that (hopefully) covers all languages 267 | } 268 | 269 | // new stuff... 270 | observedLanguages := strset.New() 271 | 272 | for _, test := range cases { 273 | t.Run(test.name, func (t *testing.T) { 274 | // use inputFixturePath and assert that syft.CatalogPackages() returns the set of expected Package objects 275 | // ... 276 | 277 | // new stuff... 278 | for _, actualPkg := range actual { 279 | observedLanguages.Add(string(actualPkg.Language)) 280 | } 281 | 282 | }) 283 | } 284 | 285 | // new stuff... 286 | for _, expectedLanguage := range pkg.AllLanguages { 287 | if !observedLanguages.Contains(expectedLanguage) { 288 | t.Errorf("failed to test language=%q", expectedLanguage) 289 | } 290 | } 291 | } 292 | ``` 293 | 294 | This is a better test since it will fail when someone adds a new language but fails to write a test case that should 295 | exercise that new language. This method is ideal for integration-level testing, where testing correctness in depth 296 | is not needed (that is what unit tests are for) but instead testing in breadth to ensure that units are well integrated. 297 | 298 | A similar case can be made for data freshness; if the quality of the results will be diminished if the input data 299 | is not kept up to date then a test should be written (when possible) to assert any input data is not stale. 300 | 301 | An example of this is the static list of licenses that is stored in `internal/spdxlicense` for use by the SPDX 302 | presenters. This list is updated and published periodically by an external group and syft can grab and update this 303 | list by running `go generate ./...` from the root of the repo. 304 | 305 | An integration test has been written to grabs the latest license list version externally and compares that version 306 | with the version generated in the codebase. If they differ, the test fails, indicating to someone that there is an 307 | action needed to update it. 308 | 309 | **_The key takeaway is to try and write tests that fail when data assumptions change and not just when code changes.**_ 310 | 311 | ### Snapshot tests 312 | 313 | The format objects make a lot of use of "snapshot" testing, where you save the expected output bytes from a call into the 314 | git repository and during testing make a comparison of the actual bytes from the subject under test with the golden 315 | copy saved in the repo. The "golden" files are stored in the `test-fixtures/snapshot` directory relative to the go 316 | package under test and should always be updated by invoking `go test` on the specific test file with a specific CLI 317 | update flag provided. 318 | 319 | Many of the `Format` tests make use of this approach, where the raw SBOM report is saved in the repo and the test 320 | compares that SBOM with what is generated from the latest presenter code. For instance, at the time of this writing 321 | the CycloneDX presenter snapshots can be updated by running: 322 | 323 | ```bash 324 | go test ./internal/formats -update-cyclonedx 325 | ``` 326 | 327 | These flags are defined at the top of the test files that have tests that use the snapshot files. 328 | 329 | Snapshot testing is only as good as the manual verification of the golden snapshot file saved to the repo! Be careful 330 | and diligent when updating these files. 331 | 332 | 333 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static-debian11:debug AS build 2 | 3 | # Add a new stage for the final image 4 | FROM scratch 5 | 6 | # Copy the ca-certificates.crt file from the build stage 7 | COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt 8 | 9 | # Set the working directory within the container 10 | WORKDIR /tmp 11 | 12 | # Copy the "syft" binary from your project directory to the container 13 | COPY syft / 14 | 15 | # Define the ARGs for labeling 16 | ARG BUILD_DATE 17 | ARG BUILD_VERSION 18 | ARG VCS_REF 19 | ARG VCS_URL 20 | 21 | # Define the image labels 22 | LABEL org.opencontainers.image.created=$BUILD_DATE 23 | LABEL org.opencontainers.image.title="syft" 24 | LABEL org.opencontainers.image.description="CLI tool and library for generating a Software Bill of Materials from container images and filesystems" 25 | LABEL org.opencontainers.image.source=$VCS_URL 26 | LABEL org.opencontainers.image.revision=$VCS_REF 27 | LABEL org.opencontainers.image.vendor="Anchore, Inc." 28 | LABEL org.opencontainers.image.version=$BUILD_VERSION 29 | LABEL org.opencontainers.image.licenses="Apache-2.0" 30 | LABEL io.artifacthub.package.readme-url="https://raw.githubusercontent.com/anchore/syft/main/README.md" 31 | LABEL io.artifacthub.package.logo-url="https://user-images.githubusercontent.com/5199289/136844524-1527b09f-c5cb-4aa9-be54-5aa92a6086c1.png" 32 | LABEL io.artifacthub.package.license="Apache-2.0" 33 | 34 | # Set the entry point for the container 35 | ENTRYPOINT ["/syft"] 36 | -------------------------------------------------------------------------------- /Dockerfile.debug: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static-debian11:debug 2 | 3 | # create the /tmp dir, which is needed for image content cache 4 | WORKDIR /tmp 5 | 6 | COPY syft / 7 | 8 | ARG BUILD_DATE 9 | ARG BUILD_VERSION 10 | ARG VCS_REF 11 | ARG VCS_URL 12 | 13 | LABEL org.opencontainers.image.created=$BUILD_DATE 14 | LABEL org.opencontainers.image.title="syft" 15 | LABEL org.opencontainers.image.description="CLI tool and library for generating a Software Bill of Materials from container images and filesystems" 16 | LABEL org.opencontainers.image.source=$VCS_URL 17 | LABEL org.opencontainers.image.revision=$VCS_REF 18 | LABEL org.opencontainers.image.vendor="Anchore, Inc." 19 | LABEL org.opencontainers.image.version=$BUILD_VERSION 20 | LABEL org.opencontainers.image.licenses="Apache-2.0" 21 | LABEL io.artifacthub.package.readme-url="https://raw.githubusercontent.com/anchore/syft/main/README.md" 22 | LABEL io.artifacthub.package.logo-url="https://user-images.githubusercontent.com/5199289/136844524-1527b09f-c5cb-4aa9-be54-5aa92a6086c1.png" 23 | LABEL io.artifacthub.package.license="Apache-2.0" 24 | 25 | ENTRYPOINT ["/syft"] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BIN := syft 2 | TEMP_DIR := ./.tmp 3 | 4 | # Command templates ################################# 5 | LINT_CMD := $(TEMP_DIR)/golangci-lint run --tests=false 6 | GOIMPORTS_CMD := $(TEMP_DIR)/gosimports -local github.com/anchore 7 | RELEASE_CMD := $(TEMP_DIR)/goreleaser release --clean 8 | SNAPSHOT_CMD := $(RELEASE_CMD) --skip-publish --skip-sign --snapshot 9 | CHRONICLE_CMD = $(TEMP_DIR)/chronicle 10 | GLOW_CMD = $(TEMP_DIR)/glow 11 | 12 | # Tool versions ################################# 13 | GOLANGCILINT_VERSION := v1.52.2 14 | GOSIMPORTS_VERSION := v0.3.8 15 | BOUNCER_VERSION := v0.4.0 16 | CHRONICLE_VERSION := v0.6.0 17 | GORELEASER_VERSION := v1.18.2 18 | YAJSV_VERSION := v1.4.1 19 | COSIGN_VERSION := v2.0.2 20 | QUILL_VERSION := v0.2.0 21 | GLOW_VERSION := v1.5.1 22 | 23 | # Formatting variables ################################# 24 | BOLD := $(shell tput -T linux bold) 25 | PURPLE := $(shell tput -T linux setaf 5) 26 | GREEN := $(shell tput -T linux setaf 2) 27 | CYAN := $(shell tput -T linux setaf 6) 28 | RED := $(shell tput -T linux setaf 1) 29 | RESET := $(shell tput -T linux sgr0) 30 | TITLE := $(BOLD)$(PURPLE) 31 | SUCCESS := $(BOLD)$(GREEN) 32 | 33 | # Test variables ################################# 34 | COMPARE_DIR := ./test/compare 35 | COMPARE_TEST_IMAGE := centos:8.2.2004 36 | COVERAGE_THRESHOLD := 62 # the quality gate lower threshold for unit test total % coverage (by function statements) 37 | 38 | ## Build variables ################################# 39 | VERSION := $(shell git describe --dirty --always --tags) 40 | DIST_DIR := ./dist 41 | SNAPSHOT_DIR := ./snapshot 42 | CHANGELOG := CHANGELOG.md 43 | OS := $(shell uname | tr '[:upper:]' '[:lower:]') 44 | SNAPSHOT_BIN := $(realpath $(shell pwd)/$(SNAPSHOT_DIR)/$(OS)-build_$(OS)_amd64_v1/$(BIN)) 45 | 46 | ifndef VERSION 47 | $(error VERSION is not set) 48 | endif 49 | 50 | define title 51 | @printf '$(TITLE)$(1)$(RESET)\n' 52 | endef 53 | 54 | define safe_rm_rf 55 | bash -c 'test -z "$(1)" && false || rm -rf $(1)' 56 | endef 57 | 58 | define safe_rm_rf_children 59 | bash -c 'test -z "$(1)" && false || rm -rf $(1)/*' 60 | endef 61 | 62 | .DEFAULT_GOAL:=help 63 | 64 | 65 | .PHONY: all 66 | all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests) 67 | @printf '$(SUCCESS)All checks pass!$(RESET)\n' 68 | 69 | .PHONY: static-analysis 70 | static-analysis: check-go-mod-tidy lint check-licenses check-json-schema-drift ## Run all static analysis checks 71 | 72 | .PHONY: test 73 | test: unit integration validate-cyclonedx-schema benchmark cli ## Run all tests (currently unit, integration, linux compare, and cli tests) 74 | 75 | 76 | ## Bootstrapping targets ################################# 77 | 78 | .PHONY: bootstrap 79 | bootstrap: $(TEMP_DIR) bootstrap-go bootstrap-tools ## Download and install all tooling dependencies (+ prep tooling in the ./tmp dir) 80 | $(call title,Bootstrapping dependencies) 81 | 82 | .PHONY: bootstrap-tools 83 | bootstrap-tools: $(TEMP_DIR) 84 | curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(QUILL_VERSION) 85 | GO111MODULE=off GOBIN=$(realpath $(TEMP_DIR)) go get -u golang.org/x/perf/cmd/benchstat 86 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMP_DIR)/ $(GOLANGCILINT_VERSION) 87 | curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMP_DIR)/ $(BOUNCER_VERSION) 88 | curl -sSfL https://raw.githubusercontent.com/anchore/chronicle/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(CHRONICLE_VERSION) 89 | .github/scripts/goreleaser-install.sh -d -b $(TEMP_DIR)/ $(GORELEASER_VERSION) 90 | # the only difference between goimports and gosimports is that gosimports removes extra whitespace between import blocks (see https://github.com/golang/go/issues/20818) 91 | GOBIN="$(realpath $(TEMP_DIR))" go install github.com/rinchsan/gosimports/cmd/gosimports@$(GOSIMPORTS_VERSION) 92 | GOBIN="$(realpath $(TEMP_DIR))" go install github.com/neilpa/yajsv@$(YAJSV_VERSION) 93 | GOBIN="$(realpath $(TEMP_DIR))" go install github.com/sigstore/cosign/v2/cmd/cosign@$(COSIGN_VERSION) 94 | GOBIN="$(realpath $(TEMP_DIR))" go install github.com/charmbracelet/glow@$(GLOW_VERSION) 95 | 96 | .PHONY: bootstrap-go 97 | bootstrap-go: 98 | go mod download 99 | 100 | $(TEMP_DIR): 101 | mkdir -p $(TEMP_DIR) 102 | 103 | 104 | ## Static analysis targets ################################# 105 | 106 | .PHONY: lint 107 | lint: ## Run gofmt + golangci lint checks 108 | $(call title,Running linters) 109 | # ensure there are no go fmt differences 110 | @printf "files with gofmt issues: [$(shell gofmt -l -s .)]\n" 111 | @test -z "$(shell gofmt -l -s .)" 112 | 113 | # run all golangci-lint rules 114 | $(LINT_CMD) 115 | @[ -z "$(shell $(GOIMPORTS_CMD) -d .)" ] || (echo "goimports needs to be fixed" && false) 116 | 117 | # go tooling does not play well with certain filename characters, ensure the common cases don't result in future "go get" failures 118 | $(eval MALFORMED_FILENAMES := $(shell find . | grep -e ':')) 119 | @bash -c "[[ '$(MALFORMED_FILENAMES)' == '' ]] || (printf '\nfound unsupported filename characters:\n$(MALFORMED_FILENAMES)\n\n' && false)" 120 | 121 | .PHONY: format 122 | format: ## Auto-format all source code 123 | $(call title,Running formatters) 124 | gofmt -w -s . 125 | $(GOIMPORTS_CMD) -w . 126 | go mod tidy 127 | 128 | .PHONY: lint-fix 129 | lint-fix: format ## Auto-format all source code + run golangci lint fixers 130 | $(call title,Running lint fixers) 131 | $(LINT_CMD) --fix 132 | 133 | .PHONY: check-licenses 134 | check-licenses: ## Ensure transitive dependencies are compliant with the current license policy 135 | $(call title,Checking for license compliance) 136 | $(TEMP_DIR)/bouncer check ./... 137 | 138 | check-go-mod-tidy: 139 | @ .github/scripts/go-mod-tidy-check.sh && echo "go.mod and go.sum are tidy!" 140 | 141 | check-json-schema-drift: 142 | $(call title,Ensure there is no drift between the JSON schema and the code) 143 | @.github/scripts/json-schema-drift-check.sh 144 | 145 | ## Testing targets ################################# 146 | 147 | .PHONY: unit 148 | unit: $(TEMP_DIR) fixtures ## Run unit tests (with coverage) 149 | $(call title,Running unit tests) 150 | go test -coverprofile $(TEMP_DIR)/unit-coverage-details.txt $(shell go list ./... | grep -v anchore/syft/test) 151 | @.github/scripts/coverage.py $(COVERAGE_THRESHOLD) $(TEMP_DIR)/unit-coverage-details.txt 152 | 153 | .PHONY: integration 154 | integration: ## Run integration tests 155 | $(call title,Running integration tests) 156 | go test -v ./test/integration 157 | 158 | .PHONY: validate-cyclonedx-schema 159 | validate-cyclonedx-schema: 160 | cd schema/cyclonedx && make 161 | 162 | .PHONY: cli 163 | cli: $(SNAPSHOT_DIR) ## Run CLI tests 164 | chmod 755 "$(SNAPSHOT_BIN)" 165 | $(SNAPSHOT_BIN) version 166 | SYFT_BINARY_LOCATION='$(SNAPSHOT_BIN)' \ 167 | go test -count=1 -timeout=15m -v ./test/cli 168 | 169 | 170 | ## Benchmark test targets ################################# 171 | 172 | .PHONY: benchmark 173 | benchmark: $(TEMP_DIR) ## Run benchmark tests and compare against the baseline (if available) 174 | $(call title,Running benchmark tests) 175 | go test -p 1 -run=^Benchmark -bench=. -count=7 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(VERSION).txt 176 | (test -s $(TEMP_DIR)/benchmark-main.txt && \ 177 | $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(VERSION).txt || \ 178 | $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(VERSION).txt) \ 179 | | tee $(TEMP_DIR)/benchstat.txt 180 | 181 | .PHONY: show-benchstat 182 | show-benchstat: 183 | @cat $(TEMP_DIR)/benchstat.txt 184 | 185 | 186 | ## Test-fixture-related targets ################################# 187 | 188 | # note: this is used by CI to determine if various test fixture cache should be restored or recreated 189 | fingerprints: 190 | $(call title,Creating all test cache input fingerprints) 191 | 192 | # for IMAGE integration test fixtures 193 | cd test/integration/test-fixtures && \ 194 | make cache.fingerprint 195 | 196 | # for BINARY test fixtures 197 | cd syft/pkg/cataloger/binary/test-fixtures && \ 198 | make cache.fingerprint 199 | 200 | # for JAVA BUILD test fixtures 201 | cd syft/pkg/cataloger/java/test-fixtures/java-builds && \ 202 | make cache.fingerprint 203 | 204 | # for GO BINARY test fixtures 205 | cd syft/pkg/cataloger/golang/test-fixtures/archs && \ 206 | make binaries.fingerprint 207 | 208 | # for RPM test fixtures 209 | cd syft/pkg/cataloger/rpm/test-fixtures && \ 210 | make rpms.fingerprint 211 | 212 | # for Kernel test fixtures 213 | cd syft/pkg/cataloger/kernel/test-fixtures && \ 214 | make cache.fingerprint 215 | 216 | # for INSTALL integration test fixtures 217 | cd test/install && \ 218 | make cache.fingerprint 219 | 220 | # for CLI test fixtures 221 | cd test/cli/test-fixtures && \ 222 | make cache.fingerprint 223 | 224 | .PHONY: fixtures 225 | fixtures: 226 | $(call title,Generating test fixtures) 227 | cd syft/pkg/cataloger/java/test-fixtures/java-builds && make 228 | cd syft/pkg/cataloger/rpm/test-fixtures && make 229 | cd syft/pkg/cataloger/binary/test-fixtures && make 230 | 231 | .PHONY: show-test-image-cache 232 | show-test-image-cache: ## Show all docker and image tar cache 233 | $(call title,Docker daemon cache) 234 | @docker images --format '{{.ID}} {{.Repository}}:{{.Tag}}' | grep stereoscope-fixture- | sort 235 | 236 | $(call title,Tar cache) 237 | @find . -type f -wholename "**/test-fixtures/cache/stereoscope-fixture-*.tar" | sort 238 | 239 | .PHONY: show-test-snapshots 240 | show-test-snapshots: ## Show all test snapshots 241 | $(call title,Test snapshots) 242 | @find . -type f -wholename "**/test-fixtures/snapshot/*" | sort 243 | 244 | 245 | ## install.sh testing targets ################################# 246 | 247 | install-test: $(SNAPSHOT_DIR) 248 | cd test/install && \ 249 | make 250 | 251 | install-test-cache-save: $(SNAPSHOT_DIR) 252 | cd test/install && \ 253 | make save 254 | 255 | install-test-cache-load: $(SNAPSHOT_DIR) 256 | cd test/install && \ 257 | make load 258 | 259 | install-test-ci-mac: $(SNAPSHOT_DIR) 260 | cd test/install && \ 261 | make ci-test-mac 262 | 263 | .PHONY: generate-compare-file 264 | generate-compare-file: 265 | $(call title,Generating compare test file) 266 | go run ./cmd/syft $(COMPARE_TEST_IMAGE) -o json > $(COMPARE_DIR)/test-fixtures/acceptance-centos-8.2.2004.json 267 | 268 | # note: we cannot clean the snapshot directory since the pipeline builds the snapshot separately 269 | .PHONY: compare-mac 270 | compare-mac: $(TEMP_DIR) $(SNAPSHOT_DIR) ## Run compare tests on build snapshot binaries and packages (Mac) 271 | $(call title,Running compare test: Run on Mac) 272 | $(COMPARE_DIR)/mac.sh \ 273 | $(SNAPSHOT_DIR) \ 274 | $(COMPARE_DIR) \ 275 | $(COMPARE_TEST_IMAGE) \ 276 | $(TEMP_DIR) 277 | 278 | # note: we cannot clean the snapshot directory since the pipeline builds the snapshot separately 279 | .PHONY: compare-linux 280 | compare-linux: compare-test-deb-package-install compare-test-rpm-package-install ## Run compare tests on build snapshot binaries and packages (Linux) 281 | 282 | .PHONY: compare-test-deb-package-install 283 | compare-test-deb-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) 284 | $(call title,Running compare test: DEB install) 285 | $(COMPARE_DIR)/deb.sh \ 286 | $(SNAPSHOT_DIR) \ 287 | $(COMPARE_DIR) \ 288 | $(COMPARE_TEST_IMAGE) \ 289 | $(TEMP_DIR) 290 | 291 | .PHONY: compare-test-rpm-package-install 292 | compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) 293 | $(call title,Running compare test: RPM install) 294 | $(COMPARE_DIR)/rpm.sh \ 295 | $(SNAPSHOT_DIR) \ 296 | $(COMPARE_DIR) \ 297 | $(COMPARE_TEST_IMAGE) \ 298 | $(TEMP_DIR) 299 | 300 | 301 | ## Code generation targets ################################# 302 | 303 | .PHONY: generate-json-schema 304 | generate-json-schema: ## Generate a new json schema 305 | cd schema/json && go run generate.go 306 | 307 | .PHONY: generate-license-list 308 | generate-license-list: ## Generate an updated spdx license list 309 | go generate ./internal/spdxlicense/... 310 | gofmt -s -w ./internal/spdxlicense 311 | 312 | 313 | ## Build-related targets ################################# 314 | 315 | .PHONY: build 316 | build: $(SNAPSHOT_DIR) ## Build release snapshot binaries and packages 317 | 318 | $(SNAPSHOT_DIR): ## Build snapshot release binaries and packages 319 | $(call title,Building snapshot artifacts) 320 | 321 | # create a config with the dist dir overridden 322 | echo "dist: $(SNAPSHOT_DIR)" > $(TEMP_DIR)/goreleaser.yaml 323 | cat .goreleaser.yaml >> $(TEMP_DIR)/goreleaser.yaml 324 | 325 | # build release snapshots 326 | $(SNAPSHOT_CMD) --config $(TEMP_DIR)/goreleaser.yaml 327 | 328 | .PHONY: changelog 329 | changelog: clean-changelog ## Generate and show the changelog for the current unreleased version 330 | $(CHRONICLE_CMD) -vvv -n --version-file VERSION > $(CHANGELOG) 331 | @$(GLOW_CMD) $(CHANGELOG) 332 | 333 | $(CHANGELOG): 334 | $(CHRONICLE_CMD) -vvv > $(CHANGELOG) 335 | 336 | .PHONY: release 337 | release: 338 | @.github/scripts/trigger-release.sh 339 | 340 | .PHONY: ci-release 341 | ci-release: ci-check clean-dist $(CHANGELOG) 342 | $(call title,Publishing release artifacts) 343 | 344 | # create a config with the dist dir overridden 345 | echo "dist: $(DIST_DIR)" > $(TEMP_DIR)/goreleaser.yaml 346 | cat .goreleaser.yaml >> $(TEMP_DIR)/goreleaser.yaml 347 | 348 | bash -c "\ 349 | $(RELEASE_CMD) \ 350 | --config $(TEMP_DIR)/goreleaser.yaml \ 351 | --release-notes <(cat $(CHANGELOG)) \ 352 | || (cat /tmp/quill-*.log && false)" 353 | 354 | # upload the version file that supports the application version update check (excluding pre-releases) 355 | .github/scripts/update-version-file.sh "$(DIST_DIR)" "$(VERSION)" 356 | 357 | .PHONY: ci-check 358 | ci-check: 359 | @.github/scripts/ci-check.sh 360 | 361 | ## Cleanup targets ################################# 362 | 363 | .PHONY: clean 364 | clean: clean-dist clean-snapshot clean-test-image-cache ## Remove previous builds, result reports, and test cache 365 | $(call safe_rm_rf_children,$(TEMP_DIR)) 366 | 367 | .PHONY: clean-snapshot 368 | clean-snapshot: 369 | $(call safe_rm_rf,$(SNAPSHOT_DIR)) 370 | rm -f $(TEMP_DIR)/goreleaser.yaml 371 | 372 | .PHONY: clean-dist 373 | clean-dist: clean-changelog 374 | $(call safe_rm_rf,$(DIST_DIR)) 375 | rm -f $(TEMP_DIR)/goreleaser.yaml 376 | 377 | .PHONY: clean-changelog 378 | clean-changelog: 379 | rm -f $(CHANGELOG) VERSION 380 | 381 | clean-test-image-cache: clean-test-image-tar-cache clean-test-image-docker-cache ## Clean test image cache 382 | 383 | .PHONY: clear-test-image-tar-cache 384 | clean-test-image-tar-cache: ## Delete all test cache (built docker image tars) 385 | find . -type f -wholename "**/test-fixtures/cache/stereoscope-fixture-*.tar" -delete 386 | 387 | .PHONY: clear-test-image-docker-cache 388 | clean-test-image-docker-cache: ## Purge all test docker images 389 | docker images --format '{{.ID}} {{.Repository}}' | grep stereoscope-fixture- | awk '{print $$1}' | uniq | xargs -r docker rmi --force 390 | 391 | ## Halp! ################################# 392 | 393 | .PHONY: help 394 | help: ## Display this help 395 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}' 396 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌐 ChatCVE Langchain App 2 | 3 | ## 🎯 Description 4 | The ChatCVE Lang Chain App is an AI powered DevSecOps application 🔍, to help organizations triage and aggregate CVE (Common Vulnerabilities & Exposures) information. By leveraging state-of-the-art Natural Language Processing, ChatCVE makes detailed Software Bill of Materials (SBOM) data accessible to everyone, because Security is everyone's job. From Security analysts to Audit and Compliance teams, ChatCVE allows a more intuitive and engaging way to extract key findings. 🤖💬 5 | 6 | ## 🚀 Features 7 | - **🧠 Natural Language Queries**: Ask questions using plain English (or your preferred language)! No need to grapple with complex query languages. 8 | - **🔮 AI-Powered Analysis**: Our app is backed by the Langchain AI framework. It can easily surface important vulnerability information using Human Language. The requests are automatically translated to [SQL](https://python.langchain.com/docs/integrations/toolkits/sql_database) for querying specific artifact findings. 9 | - **⏭️ Proactive Assistance**: Anyone can identify potential concerns proactively to improve the overall Cyber Security Posture. 10 | - **🔁 Triage & Remediation**: Assist in Vulnerability remediation using National Vulnerability Database (NVD), Syft, and Grype wrappers. Can be extended to triage using other CVE advisory databases. 11 | - **🖥️ UI/UX**: Simple Natural Language Processing command input and on-screen history log. 12 | 13 | ## 📲 Installation 14 | 15 | 1. Clone this repository: 16 | ```bash 17 | git clone https://github.com/jasona7/ChatCVE.git 18 | ``` 19 | 2. Enter the project directory: 20 | ```bash 21 | cd ChatCVE 22 | ``` 23 | 3. Setup a Python environment: 24 | ```bash 25 | python3 -m venv .env 26 | source ./env/bin/activate 27 | ``` 28 | 4. Install Grype and Syft 29 | ```bash 30 | pip install syft 31 | curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin 32 | ``` 33 | 5. Install requirements 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 6. Create the app_patrol and nvd_cves databases 38 | ```bash 39 | sqlite3> CREATE TABLE app_patrol ( 40 | NAME TEXT, 41 | INSTALLED TEXT, 42 | FIXED_IN TEXT, 43 | TYPE TEXT, 44 | VULNERABILITY TEXT, 45 | SEVERITY TEXT, 46 | IMAGE_TAG TEXT, 47 | DATE_ADDED TEXT); 48 | 49 | sqlite3> CREATE TABLE nvd_cves ( 50 | cve_id TEXT PRIMARY KEY, 51 | source_id TEXT, 52 | published TEXT, 53 | last_modified TEXT, 54 | vuln_status TEXT, 55 | description TEXT, 56 | cvss_v30_vector_string TEXT, 57 | cvss_v30_base_score REAL, 58 | cvss_v30_base_severity TEXT, 59 | cvss_v2_vector_string TEXT, 60 | cvss_v2_base_score REAL, 61 | cvss_v2_base_severity TEXT, 62 | weakness TEXT, 63 | ref_info TEXT); 64 | 65 | 5. Create an images.txt file with your images to scan. Include the registry, repo, and version tag: 66 | 67 | public.ecr.aws/tanzu_observability_demo_app/to-demo/inventory:latest 68 | public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest 69 | public.ecr.aws/tanzu_observability_demo_app/to-demo/delivery:latest 70 | public.ecr.aws/tanzu_observability_demo_app/to-demo/warehouse:latest 71 | public.ecr.aws/tanzu_observability_demo_app/to-demo/notification:latest 72 | public.ecr.aws/tanzu_observability_demo_app/to-demo/styling:latest 73 | public.ecr.aws/tanzu_observability_demo_app/to-demo/packaging:latest 74 | public.ecr.aws/tanzu_observability_demo_app/to-demo/printing:latest 75 | public.ecr.aws/tanzu_observability_demo_app/to-demo/payments:latest 76 | public.ecr.aws/tanzu_observability_demo_app/to-demo/loadgen:latest 77 | public.ecr.aws/amazoncorretto/amazoncorretto:20-al2-jdk 78 | public.ecr.aws/docker/library/tomcat:9.0.75-jdk8-corretto-al2 79 | public.ecr.aws/bitnami/minio:2023.5.18 80 | public.ecr.aws/p4c2e2q6/miniamplify-x86:latest 81 | public.ecr.aws/xray/aws-xray-daemon:3.3.7 82 | public.ecr.aws/datadog/agent:7.45.0-rc.5 83 | public.ecr.aws/aws-ec2/aws-node-termination-handler:v1.19.0 84 | public.ecr.aws/aws-gcr-solutions/data-transfer-hub-ecr:v1.0.4 85 | public.ecr.aws/bitnami/jenkins:2.387.3 86 | ``` 87 | 88 | 89 | 90 | ## 💻 Usage 91 | 1. Initiate a scan that will kick off the SBOM and CVE artifact creation. SBOM reports will appear in output/sbom, 92 | and scan summaries will appear in output/scan_summary. 93 | ``` bash 94 | python scan.py 95 | ``` 96 | 97 | 2. Initiate an App Patrol scan which will create SBOM records in the SQLite3 backend: 98 | ``` bash 99 | python fetch_daily_nvd_cves.py 100 | ``` 101 | 102 | 3. Check the SBOM records have been added: 103 | ``` bash 104 | sqlite3 app_patrol.db 105 | sqlite> SELECT * FROM app_patrol LIMIT 10; 106 | tar|1.34+dfsg-1||deb|CVE-2005-2541|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 107 | login|1:4.8.1-1||deb|CVE-2007-5686|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 108 | passwd|1:4.8.1-1||deb|CVE-2007-5686|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 109 | libssl1.1|1.1.1n-0+deb11u3||deb|CVE-2007-6755|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 110 | openssl|1.1.1n-0+deb11u3||deb|CVE-2007-6755|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 111 | jetty-setuid-java|1.0.4||java-archive|CVE-2009-5045|High|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 112 | jetty-setuid-java|1.0.4||java-archive|CVE-2009-5046|Medium|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 113 | libssl1.1|1.1.1n-0+deb11u3||deb|CVE-2010-0928|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 114 | openssl|1.1.1n-0+deb11u3||deb|CVE-2010-0928|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 115 | libc-bin|2.31-13+deb11u3||deb|CVE-2010-4756|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15 116 | ``` 117 | 118 | 4. Start a Chat-CVE OpenAI SQL Agent session (localhost:5000): 119 | 120 | NOTE: Refine guardrails, temperature, etc to improve accuracy and output. 121 | ```bash 122 | python frontend/app.py 123 | ``` 124 | ![CVE Query Interface](assets/chatcve_ui.png) 125 | 126 | NOTE: chat_cve.py will let you launch a command line session. 127 | 128 | Query at the prompt: 129 | ```bash 130 | Enter a question or type 'exit' to quit: Which NAME in app_patrol table has the most CRITICAL Severity records? 131 | ``` 132 | Expected Output: 133 | ```bash 134 | ** Thought: I should query the app_patrol table to get the name with the most Critical CVEs. ** 135 | Thought: I should execute the query to get the results. 136 | Action: query_sql_db 137 | Action Input: SELECT NAME, COUNT(*) AS Top FROM app_patrol WHERE SEVERITY = 'Critical' GROUP BY NAME ORDER BY Top DESC LIMIT 3 138 | Observation: [('curl', 42), ('libcurl4', 42), ('libpcre2-8-0', 16)] 139 | Thought: I now know the final answer. 140 | Final Answer: The top 3 Names in the app_patrol table sorted by the top count of critical in the severity column are 'curl', 'libcurl4', and 'libpcre2-8-0'. 141 | ``` 142 | 143 | 144 | ## 🌈 Software Supply Chain and Security Use Cases 145 | - **Security Analysts**: Assist Triage & find detailed CVE information quickly without dealing with intricate databases. 146 | - **Audit Teams**: Efficiently target auditing efforts and ensure compliance with security standards. 147 | - **Compliance Teams**: Maintain documentation and track usage for attestation efforts, ensuring all known libraries are documented. Non technical personnel can simply use human langauge. 148 | - **Development Teams**: Efficiently target underlying libraries and get access to remediation suggestions. 149 | 150 | ## ⭐⭐ Example prompt queries and results 151 | ```bash 152 | What percentage of records are for curl in the app_patrol table? 153 | 154 | Thought: I should query the app_patrol table to get the percentage of records for curl. 155 | Action: query_sql_db 156 | Action Input: SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM app_patrol) FROM app_patrol WHERE NAME = 'curl' 157 | Observation: [(6.006697362913353,)] 158 | Thought: I now know the final answer. 159 | Final Answer: 6.006697362913353% of records in the app_patrol table are for curl. 160 | 161 | How many critical records are there in the app_patrol table? 162 | 163 | Thought: I should query the app_patrol table for the number of critical records. 164 | Action: query_sql_db 165 | Action Input: SELECT COUNT(*) FROM app_patrol WHERE SEVERITY = 'Critical' 166 | Observation: [(246,)] 167 | Thought: I now know the final answer. 168 | Final Answer: There are 246 critical records in the app_patrol table. 169 | 170 | Which name in the app_patrol table has the most Critical Severity records? 171 | 172 | Thought: I should query the app_patrol table to find the name with the most Critical Severity records. 173 | Action: query_sql_db 174 | Action Input: SELECT NAME, COUNT(*) AS count FROM app_patrol WHERE SEVERITY = 'Critical' GROUP BY NAME ORDER BY count DESC LIMIT 10; 175 | Observation: [('curl', 42), ('libcurl4', 42), ('libpcre2-8-0', 16), ('libksba8', 15), ('jetty-setuid-java', 14), ('libdb5.3', 9), ('libtasn1-6', 9), ('zlib1g', 8), ('System.Drawing.Common', 7), ('libexpat1', 7)] 176 | Thought: I now know the final answer. 177 | Final Answer: The name with the most Critical Severity records is 'curl' with 42 records. 178 | ``` 179 | 180 | 181 | ## 🤝 Contributing 182 | We welcome your feedback! 🙌 183 | For all significant changes, please open an issue first to discuss what you'd like to improve. 184 | 185 | ## 📃 License 186 | Our project is licensed under the [MIT License](https://choosealicense.com/licenses/mit/). 187 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release 2 | 3 | ## Creating a release 4 | 5 | This release process itself should be as automated as possible, and has only a few steps: 6 | 7 | 1. **Trigger a new release with `make release`**. At this point you'll see a preview 8 | changelog in the terminal. If you're happy with the changelog, press `y` to continue, otherwise 9 | you can abort and adjust the labels on the PRs and issues to be included in the release and 10 | re-run the release trigger command. 11 | 12 | 1. A release admin must approve the release on the GitHub Actions release pipeline run page. 13 | Once approved, the release pipeline will generate all assets and publish a GitHub Release. 14 | 15 | 1. If there is a release Milestone, close it. 16 | 17 | Ideally releasing should be done often with small increments when possible. Unless a 18 | breaking change is blocking the release, or no fixes/features have been merged, a good 19 | target release cadence is between every 1 or 2 weeks. 20 | 21 | 22 | ## Retracting a release 23 | 24 | If a release is found to be problematic, it can be retracted with the following steps: 25 | 26 | - Deleting the GitHub Release 27 | - Untag the docker images in the `ghcr.io` and `docker.io` registries 28 | - Revert the brew formula in [`anchore/homebrew-syft`](https://github.com/anchore/homebrew-syft) to point to the previous release 29 | - Add a new `retract` entry in the go.mod for the versioned release 30 | 31 | **Note**: do not delete release tags from the git repository since there may already be references to the release 32 | in the go proxy, which will cause confusion when trying to reuse the tag later (the H1 hash will not match and there 33 | will be a warning when users try to pull the new release). 34 | 35 | 36 | ## Background 37 | 38 | A good release process has the following qualities: 39 | 40 | 1. There is a way to plan what should be in a release 41 | 1. There is a way to see what is actually in a release 42 | 1. Allow for different kinds of releases (major breaking vs backwards compatible enhancements vs patch updates) 43 | 1. Specify a repeatable way to build and publish software artifacts 44 | 45 | 46 | ### Planning a release 47 | 48 | To indicate a set of features to be released together add each issue to an in-repository 49 | Milestone named with major-minor version to be released (e.g. `v0.1`). It is OK for other 50 | features to be in the release that were not originally planned, and these issues and PRs 51 | do not need to be added to the Milestone in question. Only the set of features that, when 52 | completed, would allow the release to be considered complete. A Milestone is only used to: 53 | 54 | - Plan what is desired to be in a release 55 | - Track progress to indicate when we may be ready to cut a new release 56 | 57 | Not all releases need to be planned. For instance, patch releases for fixes should be 58 | released when they are ready and when releasing would not interfere with another current 59 | release (where some partial or breaking features have already been merged). 60 | 61 | Unless necessary, feature releases should be small and frequent, which may obviate the 62 | need for regular release planning under a Milestone. 63 | 64 | 65 | ### What is in a release 66 | 67 | Milestones are specifically for planning a release, not necessarily tracking all changes 68 | that a release may bring (and more importantly, not all releases are necessarily planned 69 | either). 70 | 71 | This is one of the (many) reasons for a Changelog. A good Changelog lists changes grouped 72 | by the type of change (new, enhancement, deprecation, breaking, bug fix, security fix), in 73 | chronological order (within groups), linking the PR where the change was made in the 74 | Changelog line. Furthermore, there should be a place to see all released versions, the 75 | release date for each release, the semantic version of the release, and the set of changes 76 | for each release. 77 | 78 | **This project auto-generates the Changelog contents for each current release and posts the 79 | generated contents to the GitHub Release page**. Leveraging the GitHub Releases feature 80 | allows GitHub to manage the Changelog on each release outside of the git source tree while 81 | still being hosted with the released assets. 82 | 83 | The Changelog is generated from the metadata from in-repository issues and PRs, using 84 | labels to guide what kind of change each item is (e.g. breaking, new feature, bug fix, 85 | etx). Only issues/PRs with select labels are included in the Changelog, and only if the 86 | issue/PR was created after the last release. Additional labels are used to exclude items 87 | from the Changelog. 88 | 89 | The above suggestions imply that we should: 90 | 91 | - Ensure there is a sufficient title for each PR and issue title to be included in the 92 | Changelog 93 | - The appropriate label is applied to PRs and/or issues to drive specific change type 94 | sections (deprecated, breaking, security, bug, etc) 95 | 96 | **With this approach as we cultivate good organization of PRs and issues we automatically 97 | get an equally good Changelog.** 98 | 99 | 100 | ### Major, minor, and patch releases 101 | 102 | The latest version of the tool is the only supported version, which implies that multiple 103 | parallel release branches will not be a regular process (if ever). Multiple releases can 104 | be planned in parallel, however, only one can be actively developed at a time. That is, if 105 | PRs attached to a release Milestone have been merged into the main branch, that release is 106 | now the "next" release. **This implies that the source of truth for release lies with the 107 | git log and Changelog, not with the release Milestones** (which are purely for planning and 108 | tracking). 109 | 110 | Semantic versioning should be used to indicate breaking changes, new features, and fixes. 111 | The exception to this is `< 1.0`, where the major version is not bumped for breaking changes, 112 | instead the minor version indicates both new features and breaking changes. 113 | -------------------------------------------------------------------------------- /__pycache__/scan.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/__pycache__/scan.cpython-310.pyc -------------------------------------------------------------------------------- /app_patrol.d: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/app_patrol.d -------------------------------------------------------------------------------- /app_patrol.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/app_patrol.db -------------------------------------------------------------------------------- /artifacthub-repo.yml: -------------------------------------------------------------------------------- 1 | # See documentation here: https://github.com/artifacthub/hub/blob/v1.6.0/docs/metadata/artifacthub-repo.yml 2 | repositoryID: eced152f-b15d-4879-8b3b-1175397192ba 3 | owners: 4 | - name: wagoodman 5 | email: wagoodman@gmail.com 6 | -------------------------------------------------------------------------------- /assets/chatcve_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/assets/chatcve_ui.png -------------------------------------------------------------------------------- /chat_cve.py: -------------------------------------------------------------------------------- 1 | from langchain.sql_database import SQLDatabase 2 | from langchain.llms.openai import OpenAI 3 | from langchain.agents import create_sql_agent, AgentExecutor 4 | from langchain_community.agent_toolkits import SQLDatabaseToolkit 5 | import os 6 | 7 | # Initialize your LLM (Language Learning Model) with OpenAI api key environment variable named openai_api_key 8 | 9 | llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY")) 10 | 11 | # Define the SQLDatabaseToolkit connection to the App_Patrol Database 12 | db = SQLDatabase.from_uri("sqlite:////ChatCVE/app_patrol.db") 13 | toolkit = SQLDatabaseToolkit(db=db, llm=llm) # Now passing both db and llm to SQLDatabaseToolkit 14 | 15 | agent_executor = create_sql_agent( 16 | llm=llm, 17 | toolkit=toolkit, 18 | verbose=True 19 | ) 20 | 21 | #Take user input from the command line and run the agent on it 22 | while True: 23 | guardrails = "Do not use sql LIMIT in the results. " 24 | user_input = input("Enter a question or type 'exit' to quit: ") 25 | if user_input.lower() == 'exit': 26 | break 27 | 28 | # Prepending guardrails to user_input before running 29 | safe_user_input = guardrails + user_input 30 | agent_executor.run(safe_user_input) 31 | -------------------------------------------------------------------------------- /fetch_daily_nvd_cves.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import urllib.parse 3 | import json 4 | import sqlite3 5 | from datetime import datetime, timedelta 6 | import logging 7 | 8 | 9 | # Set up logging 10 | logging.basicConfig( 11 | filename='app.log', 12 | filemode='a', 13 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 14 | level=logging.INFO 15 | ) 16 | 17 | # Start time 18 | start_time = datetime.now() 19 | 20 | # Get current UTC time and 24 hours earlier 21 | now = datetime.utcnow() 22 | one_day_ago = now - timedelta(days=1) 23 | 24 | # Format the times as strings in the required format 25 | now_str = now.strftime("%Y-%m-%dT%H:%M:%S") + '.999-05:00' 26 | one_day_ago_str = one_day_ago.strftime("%Y-%m-%dT%H:%M:%S") + '.000-05:00' 27 | 28 | # Construct the URL 29 | base_url = "https://services.nvd.nist.gov/rest/json/cves/2.0" 30 | query_params = { 31 | "pubStartDate": one_day_ago_str, 32 | "pubEndDate": now_str 33 | } 34 | url = base_url + "?" + urllib.parse.urlencode(query_params) 35 | 36 | # Make the request and parse the response 37 | response = urllib.request.urlopen(url) 38 | data = json.loads(response.read().decode()) 39 | 40 | # Open a connection to the SQLite database and create a cursor object 41 | conn = sqlite3.connect('../app_patrol.db') 42 | cursor = conn.cursor() 43 | 44 | count = 0 45 | severity_count = {} 46 | 47 | # For each CVE in the response, insert the data into the nvd_cves table 48 | for vuln in data['vulnerabilities']: 49 | count += 1 50 | cve = vuln['cve'] 51 | metric_v3 = cve['metrics']['cvssMetricV30'][0]['cvssData'] if cve['metrics'].get('cvssMetricV30') else {} 52 | metric_v2 = cve['metrics']['cvssMetricV2'][0]['cvssData'] if cve['metrics'].get('cvssMetricV2') else {} 53 | 54 | severity = metric_v3.get('baseSeverity', 'N/A') 55 | severity_count[severity] = severity_count.get(severity, 0) + 1 56 | 57 | cursor.execute(""" 58 | INSERT OR REPLACE INTO nvd_cves 59 | (cve_id, source_id, published, last_modified, vuln_status, description, 60 | cvss_v30_vector_string, cvss_v30_base_score, cvss_v30_base_severity, 61 | cvss_v2_vector_string, cvss_v2_base_score, cvss_v2_base_severity, 62 | weakness, ref_info) 63 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 64 | """, 65 | (cve['id'], 66 | cve['sourceIdentifier'], 67 | cve['published'], 68 | cve['lastModified'], 69 | cve['vulnStatus'], 70 | cve['descriptions'][0]['value'] if cve.get('descriptions') else None, 71 | metric_v3.get('vectorString'), 72 | metric_v3.get('baseScore'), 73 | metric_v3.get('baseSeverity'), 74 | metric_v2.get('vectorString'), 75 | metric_v2.get('baseScore'), 76 | metric_v2.get('baseSeverity'), 77 | cve['weaknesses'][0]['description'][0]['value'] if cve.get('weaknesses') else None, 78 | json.dumps(cve['references']))) 79 | 80 | # Commit the changes and close the connection 81 | conn.commit() 82 | conn.close() 83 | 84 | # End time 85 | end_time = datetime.now() 86 | 87 | # Calculate execution time 88 | execution_time = end_time - start_time 89 | 90 | # Write summary to log file 91 | log_dir = '/ChatCVE/logs/' 92 | log_filename = now.strftime("%Y-%m-%d_%H_%M_%S_fetch_summary.log").replace(':', '_').replace('/', '_') 93 | with open(log_dir + log_filename, 'w') as f: 94 | f.write(f"Script execution summary:\n") 95 | f.write(f"Records created or updated: {count}\n") 96 | f.write(f"Execution time: {execution_time}\n") 97 | f.write(f"Severity count:\n") 98 | for severity, count in severity_count.items(): 99 | f.write(f"{severity}: {count}\n") 100 | -------------------------------------------------------------------------------- /frontend/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request, redirect, url_for 2 | from langchain.sql_database import SQLDatabase 3 | from langchain.llms.openai import OpenAI 4 | from langchain.agents import create_sql_agent 5 | from langchain_community.agent_toolkits import SQLDatabaseToolkit 6 | import os 7 | import re 8 | from sqlalchemy.exc import SQLAlchemyError 9 | 10 | app = Flask(__name__) 11 | 12 | # Initialize LLM with OpenAI API key 13 | llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY")) 14 | 15 | # Define the SQLDatabaseToolkit connection 16 | db = SQLDatabase.from_uri("sqlite:////ChatCVE/app_patrol.db") 17 | toolkit = SQLDatabaseToolkit(db=db, llm=llm) 18 | 19 | agent_executor = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True) 20 | 21 | # History of questions and answers 22 | history = [] 23 | 24 | def execute_sql_query(query): 25 | try: 26 | # Assuming db.session.execute is the correct way to run queries with SQLDatabase 27 | result = db.session.execute(query) 28 | return [dict(row) for row in result.fetchall()] 29 | except SQLAlchemyError as e: 30 | return str(e) 31 | 32 | @app.route('/', methods=['GET', 'POST']) 33 | def home(): 34 | if request.method == 'POST': 35 | user_input = request.form.get('question') 36 | if user_input: 37 | guardrails = "Do not use sql LIMIT in the results. the tables in the database are nvd_findings and also app_patrol. Output should only be the SL query result." 38 | safe_user_input = guardrails + user_input 39 | response = agent_executor.run(safe_user_input) 40 | 41 | # Check if the response is a SQL statement 42 | if re.match(r"\s*SELECT\s+", response, re.IGNORECASE): 43 | # Execute the SQL query and get the results 44 | results = execute_sql_query(response) 45 | # Format the results as a string or handle as needed 46 | formatted_results = ', '.join([str(row) for row in results]) 47 | response = formatted_results 48 | 49 | # Insert the new entry at the beginning of the history list 50 | history.insert(0, (user_input, response)) 51 | 52 | return render_template('index.html', history=history) 53 | 54 | if __name__ == '__main__': 55 | app.run(debug=True) 56 | -------------------------------------------------------------------------------- /frontend/static/style.css: -------------------------------------------------------------------------------- 1 | /* Use Google Fonts for better typography */ 2 | @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap'); 3 | 4 | /* Basic reset */ 5 | * { 6 | box-sizing: border-box; 7 | margin: 10px; 8 | padding: 0; 9 | } 10 | 11 | body { 12 | font-family: 'Roboto', sans-serif; 13 | background-color: #f4f4f4; 14 | line-height: 1.6; 15 | padding: 20px; 16 | } 17 | 18 | .container { 19 | max-width: 800px; 20 | margin: 20px auto; 21 | padding: 20px; 22 | background: #fff; 23 | box-shadow: 0 3px 7px rgba(0, 0, 0, 0.1); 24 | } 25 | 26 | h1 { 27 | color: #333; 28 | margin-bottom: 1rem; 29 | } 30 | 31 | .question-form input[type="text"] { 32 | width: 70%; 33 | padding: 10px; 34 | margin-right: 10px; 35 | border: 1px solid #ccc; 36 | border-radius: 4px; 37 | } 38 | 39 | .question-form input[type="submit"] { 40 | padding: 10px 20px; 41 | border: none; 42 | border-radius: 4px; 43 | background: #007bff; 44 | color: #fff; 45 | cursor: pointer; 46 | } 47 | 48 | .question-form input[type="submit"]:hover { 49 | background: #0056b3; 50 | } 51 | 52 | .question-input { 53 | width: 100%; /* Make the input stretch to the full width of its container */ 54 | padding: 15px; /* Increase padding for larger touch area and better visibility */ 55 | font-size: 1.25rem; /* Increase font size for better readability */ 56 | margin-bottom: 10px; /* Add some space below the input field */ 57 | border: 2px solid #007bff; /* Add a border that stands out */ 58 | border-radius: 4px; /* Slightly rounded corners for a modern look */ 59 | box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1); /* Subtle inner shadow for depth */ 60 | } 61 | 62 | .submit-btn { 63 | padding: 15px 30px; /* Larger padding */ 64 | font-size: 1.25rem; /* Increase font size to match the input field */ 65 | text-transform: uppercase; /* Optional: uppercase text for the button */ 66 | letter-spacing: 1px; /* Optional: spacing out letters a bit */ 67 | border-radius: 4px; /* Match the border radius of the input */ 68 | border: 2px solid transparent; /* Hide border */ 69 | background-color: #007bff; /* Button color */ 70 | color: white; /* Text color */ 71 | cursor: pointer; /* Cursor to indicate it's clickable */ 72 | transition: background-color 0.3s ease; /* Smooth transition for hover effect */ 73 | } 74 | 75 | .submit-btn:hover { 76 | background-color: #0056b3; /* Darker shade when hovered */ 77 | } 78 | 79 | .history { 80 | list-style-type: none; 81 | margin-top: 2rem; 82 | } 83 | 84 | .history-item { 85 | background-color: #f9f9f9; 86 | border-left: 5px solid #007bff; 87 | margin-bottom: 10px; 88 | padding: 10px; 89 | } 90 | 91 | /* Responsive adjustments */ 92 | @media (max-width: 768px) { 93 | .container { 94 | width: 95%; /* Slight padding from the edges on smaller screens */ 95 | } 96 | 97 | .question-form { 98 | flex-direction: column; /* Stack input and button on top of each other */ 99 | } 100 | 101 | .submit-btn { 102 | width: 100%; /* Full width button on smaller screens */ 103 | margin-top: 10px; /* Add space between input and button */ 104 | } 105 | 106 | .history-table { 107 | width: 100%; /* Full width table */ 108 | border-collapse: collapse; /* Collapse borders */ 109 | } 110 | 111 | .history-item { 112 | background-color: #f9f9f9; /* Light grey background for each row */ 113 | border-bottom: 1px solid #e1e1e1; /* Separator for rows */ 114 | } 115 | 116 | .history-item:last-child { 117 | border-bottom: none; /* No border for the last row */ 118 | } 119 | 120 | /* Remove width and margin from .question and .answer to let flex handle the sizing */ 121 | .question, 122 | .answer { 123 | padding: 10px; /* Spacing inside cells */ 124 | /* width: flex; This line is commented out because 'flex' is not a valid value for width */ 125 | font-weight: bold; /* Make question text bold */ 126 | /* margin-left: 15px; */ 127 | /* margin-right: 15px; */ 128 | /* margin-top: 8px; */ 129 | } 130 | 131 | /* Add display flex to .history-item to allow flexible space distribution between question and answer */ 132 | .history-item { 133 | display: flex; 134 | align-items: flex-start; /* Align children to the start of the cross axis */ 135 | background-color: #f9f9f9; 136 | border-left: 5px solid #007bff; 137 | margin-bottom: 10px; 138 | padding: 10px; 139 | } 140 | 141 | /* Ensure that the direct children of .history-item (typically divs for Q and A) take full width if needed */ 142 | .history-item > div { 143 | flex: 1; /* Grow to use the available space */ 144 | margin-bottom: 0.5rem; /* Space between question and answer */ 145 | } 146 | 147 | /* Align the content of the Q and A containers at the start */ 148 | .history-item .question, 149 | .history-item .answer { 150 | align-self: flex-start; 151 | width: 100%; /* Ensure each takes full width of the flex container */ 152 | } 153 | 154 | 155 | .question { 156 | flex: 1 100%; /* Allow question to grow and ensure it takes full width on wrap */ 157 | font-weight: bold; /* Make question text bold */ 158 | margin-bottom: 0.5rem; /* Add some space below the question */ 159 | } 160 | 161 | .answer { 162 | flex: 3 100%; /* Allow answer to take the remaining space and full width on wrap */ 163 | word-break: break-word; /* Ensure long words do not overflow */ 164 | margin-bottom: 0.5rem; /* Add some space below the answer for when they stack on small screens */ 165 | } 166 | 167 | /* ... other styles ... */ 168 | 169 | @media (max-width: 768px) { 170 | .history-item { 171 | flex-direction: column; /* Stack question and answer on top of each other */ 172 | } 173 | 174 | .question, 175 | .answer { 176 | width: 100%; /* Full width for both question and answer on smaller screens */ 177 | } 178 | } -------------------------------------------------------------------------------- /frontend/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ChatCVE Questions 6 | 7 | 8 | 9 | 10 | 11 |
12 |

Ask a Question about the CVE Scans

13 |
14 | 15 | 16 |
17 | 18 |
19 |

History

20 | {% if history %} 21 | 22 | {% for q, a in history %} 23 | 24 | 25 | 26 | 27 | {% endfor %} 28 |
Q: {{ q }}A: {{ a }}
29 | {% else %} 30 |

No history yet.

31 | {% endif %} 32 |
33 | 34 |
35 | 36 | 37 | -------------------------------------------------------------------------------- /images.txt: -------------------------------------------------------------------------------- 1 | public.ecr.aws/xray/aws-xray-daemon:3.3.7 2 | public.ecr.aws/eks-distro/kubernetes-csi/node-driver-registrar:v2.8.0-eks-1-27-4 3 | -------------------------------------------------------------------------------- /node_modules/.package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ChatCVE", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ChatCVE", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.146 2 | urllib3==1.26.15 3 | openai==0.27.7 4 | -------------------------------------------------------------------------------- /scan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import subprocess 4 | import json 5 | import datetime 6 | from pathlib import Path 7 | import sqlite3 8 | import logging 9 | from logging.handlers import TimedRotatingFileHandler 10 | 11 | # Set up logging with rotation at midnight and keeping 7 days history 12 | logger = logging.getLogger("ChatCVELogger") 13 | logger.setLevel(logging.INFO) 14 | handler = TimedRotatingFileHandler('ChatCVE_logs.log', when="midnight", interval=1, backupCount=7) 15 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y%m%d%H%M%S') 16 | handler.setFormatter(formatter) 17 | logger.addHandler(handler) 18 | 19 | def syft_scan(image): 20 | syft_executable = '/usr/bin/syft' # Adjust the full path to syft as needed 21 | try: 22 | result = subprocess.run([syft_executable, '-o', 'cyclone-dx-json', image], capture_output=True, text=True) 23 | if result.returncode != 0: 24 | logger.error(f"Error executing syft command on image: {image}") 25 | logger.error(f"Error details: {result.stderr.strip()}") 26 | return None 27 | return json.loads(result.stdout) 28 | except json.JSONDecodeError as e: 29 | logger.error(f"Error parsing JSON output for image: {image}: {e}") 30 | return None 31 | 32 | def grype_scan(image): 33 | try: 34 | result = subprocess.run(['grype', '-o', 'json', image], capture_output=True, text=True) 35 | if result.returncode != 0: 36 | logger.error(f"Error executing grype command on image: {image}: {result.stderr.strip()}") 37 | return None 38 | return json.loads(result.stdout) 39 | except json.JSONDecodeError as e: 40 | logger.error(f"Error parsing JSON output for image: {image}: {e}") 41 | return None 42 | 43 | def write_to_db(db_name, scan_result, image_name): 44 | try: 45 | conn = sqlite3.connect(db_name) 46 | cursor = conn.cursor() 47 | for vulnerability in scan_result.get('matches', []): 48 | name = vulnerability.get('artifact', {}).get('name') 49 | installed = vulnerability.get('artifact', {}).get('version') 50 | fixed_in = vulnerability.get('vulnerability', {}).get('fixedInVersion') 51 | type = vulnerability.get('artifact', {}).get('type') 52 | vulnerability_id = vulnerability.get('vulnerability', {}).get('id') 53 | severity = vulnerability.get('vulnerability', {}).get('severity') 54 | cursor.execute("INSERT INTO app_patrol (NAME, INSTALLED, FIXED_IN, TYPE, VULNERABILITY, SEVERITY, IMAGE_TAG, DATE_ADDED) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (name, installed, fixed_in, type, vulnerability_id, severity, image_name)) 55 | conn.commit() 56 | except sqlite3.Error as e: 57 | logger.error(f"SQLite error: {e}") 58 | except Exception as e: 59 | logger.error(f"Unexpected error when writing to DB: {e}") 60 | finally: 61 | conn.close() 62 | 63 | if not Path('images.txt').is_file(): 64 | logger.error("The file 'images.txt' does not exist.") 65 | images = [] 66 | else: 67 | with open('images.txt') as f: 68 | images = [line.strip() for line in f if line.strip()] 69 | 70 | successful_scans = 0 71 | start_time = datetime.datetime.now() 72 | 73 | # Adjust the base directory to your project's needs 74 | base_dir = Path(__file__).parent 75 | scan_output_rootdir = base_dir / 'output' 76 | scan_output_sbom_subdir = scan_output_rootdir / 'sbom' 77 | scan_output_summary_subdir = scan_output_rootdir / 'scan_summary' 78 | 79 | # Ensure directories exist 80 | scan_output_sbom_subdir.mkdir(parents=True, exist_ok=True) 81 | scan_output_summary_subdir.mkdir(parents=True, exist_ok=True) 82 | 83 | for image in images: 84 | result = syft_scan(image) 85 | if result is None: 86 | continue 87 | 88 | now = datetime.datetime.now() 89 | formatted_now = now.strftime("%Y%m%d") 90 | 91 | # Correct directory for SBOM .json files 92 | scan_output_sbom_subdir.mkdir(parents=True, exist_ok=True) 93 | 94 | filename = image.replace('/', '_').replace(':', '__') + '.json' 95 | sbom_filename = scan_output_sbom_subdir / f"{formatted_now}_{filename}" # Corrected path for SBOM files 96 | 97 | try: 98 | with open(sbom_filename, 'w') as f: # Use sbom_filename for SBOM files 99 | json.dump(result, f) 100 | successful_scans += 1 101 | except IOError as e: 102 | logger.error(f"Error writing to file: {sbom_filename}: {e}") 103 | 104 | grype_result = grype_scan(image) 105 | if grype_result is not None: 106 | write_to_db('app_patrol.db', grype_result, image) 107 | 108 | execution_time = datetime.datetime.now() - start_time 109 | summary = f"Scanned {successful_scans} images\n" \ 110 | f"Results stored in {successful_scans} files\n" \ 111 | f"Total number of images scanned: {len(images)}\n" \ 112 | f"Total execution time: {execution_time}\n" 113 | logger.info(summary) 114 | 115 | # Write summary to a file in the scan_summary directory 116 | summary_file_path = scan_output_summary_subdir / f"{now.strftime('%Y%m%d%H%M%S')}_summary.txt" 117 | with open(summary_file_path, 'w') as f: 118 | f.write(summary) 119 | -------------------------------------------------------------------------------- /tests/__pycache__/test_scan.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/tests/__pycache__/test_scan.cpython-310.pyc -------------------------------------------------------------------------------- /tests/test_scan.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch 3 | from scan import syft_scan 4 | 5 | class TestSyftScan(unittest.TestCase): 6 | @patch('scan.subprocess.run') 7 | def test_syft_scan_success(self, mock_run): 8 | # Mock subprocess.run to simulate syft command success 9 | mock_run.return_value.returncode = 0 10 | mock_run.return_value.stdout = '{"vulnerabilities": []}' # Example JSON output 11 | 12 | result = syft_scan("dummy_image") 13 | self.assertIsNotNone(result) 14 | self.assertEqual(result, {"vulnerabilities": []}) 15 | 16 | @patch('scan.subprocess.run') 17 | def test_syft_scan_failure(self, mock_run): 18 | # Mock subprocess.run to simulate syft command failure 19 | mock_run.return_value.returncode = 1 20 | mock_run.return_value.stderr = "error message" 21 | 22 | result = syft_scan("dummy_image") 23 | self.assertIsNone(result) 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | --------------------------------------------------------------------------------