├── .bouncer.yaml
├── .github
    └── workflows
    │   └── codeql.yml
├── .gitignore
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── DEVELOPING.md
├── Dockerfile
├── Dockerfile.debug
├── LICENSE
├── Makefile
├── README.md
├── RELEASE.md
├── __pycache__
    └── scan.cpython-310.pyc
├── app_patrol.d
├── app_patrol.db
├── artifacthub-repo.yml
├── assets
    └── chatcve_ui.png
├── chat_cve.py
├── fetch_daily_nvd_cves.py
├── frontend
    ├── app.py
    ├── static
    │   └── style.css
    └── templates
    │   └── index.html
├── images.txt
├── node_modules
    └── .package-lock.json
├── package-lock.json
├── package.json
├── requirements.txt
├── scan.py
└── tests
    ├── __pycache__
        └── test_scan.cpython-310.pyc
    └── test_scan.py


/.bouncer.yaml:
--------------------------------------------------------------------------------
 1 | permit:
 2 |   - BSD.*
 3 |   - CC0.*
 4 |   - MIT.*
 5 |   - Apache.*
 6 |   - MPL.*
 7 |   - ISC
 8 |   - WTFPL
 9 | 
10 | ignore-packages:
11 |   # packageurl-go is released under the MIT license located in the root of the repo at /mit.LICENSE
12 |   - github.com/anchore/packageurl-go
13 | 
14 |   # both of these dependencies are specified as Apache-2.0 in their respective GitHub READMEs
15 |   - github.com/alibabacloud-go/cr-20160607/client
16 |   - github.com/alibabacloud-go/tea-xml/service
17 | 
18 |   # crypto/internal/boring is released under the openSSL license as a part of the Golang Standard Libary
19 |   - crypto/internal/boring
20 | 
21 |   # from: https://github.com/spdx/tools-golang/blob/main/LICENSE.code
22 |   # The tools-golang source code is provided and may be used, at your option,
23 |   # under either:
24 |   # * Apache License, version 2.0 (Apache-2.0), OR
25 |   # * GNU General Public License, version 2.0 or later (GPL-2.0-or-later).
26 |   # (we choose Apache-2.0)
27 |   - github.com/spdx/tools-golang
28 | 
29 |   # from: https://github.com/xi2/xz/blob/master/LICENSE
30 |   # All these files have been put into the public domain.
31 |   # You can do whatever you want with these files.
32 |   - github.com/xi2/xz
33 | 
34 |   # from: https://gitlab.com/cznic/sqlite/-/blob/v1.15.4/LICENSE
35 |   # This is a BSD-3-Clause license
36 |   - modernc.org/libc
37 |   - modernc.org/libc/errno
38 |   - modernc.org/libc/fcntl
39 |   - modernc.org/libc/fts
40 |   - modernc.org/libc/grp
41 |   - modernc.org/libc/langinfo
42 |   - modernc.org/libc/limits
43 |   - modernc.org/libc/netdb
44 |   - modernc.org/libc/netinet/in
45 |   - modernc.org/libc/poll
46 |   - modernc.org/libc/pthread
47 |   - modernc.org/libc/pwd
48 |   - modernc.org/libc/signal
49 |   - modernc.org/libc/stdio
50 |   - modernc.org/libc/stdlib
51 |   - modernc.org/libc/sys/socket
52 |   - modernc.org/libc/sys/stat
53 |   - modernc.org/libc/sys/types
54 |   - modernc.org/libc/termios
55 |   - modernc.org/libc/time
56 |   - modernc.org/libc/unistd
57 |   - modernc.org/libc/utime
58 |   - modernc.org/libc/uuid/uuid
59 |   - modernc.org/libc/wctype
60 |   - modernc.org/mathutil
61 |   - modernc.org/memory
62 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "master" ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "master" ]
20 |   schedule:
21 |     - cron: '42 4 * * 3'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
27 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
28 |     #   - https://gh.io/supported-runners-and-hardware-resources
29 |     #   - https://gh.io/using-larger-runners
30 |     # Consider using larger runners for possible analysis time improvements.
31 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
32 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
33 |     permissions:
34 |       actions: read
35 |       contents: read
36 |       security-events: write
37 | 
38 |     strategy:
39 |       fail-fast: false
40 |       matrix:
41 |         language: [ 'python' ]
42 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
43 |         # Use only 'java' to analyze code written in Java, Kotlin or both
44 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
45 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
46 | 
47 |     steps:
48 |     - name: Checkout repository
49 |       uses: actions/checkout@v3
50 | 
51 |     # Initializes the CodeQL tools for scanning.
52 |     - name: Initialize CodeQL
53 |       uses: github/codeql-action/init@v2
54 |       with:
55 |         languages: ${{ matrix.language }}
56 |         # If you wish to specify custom queries, you can do so here or in a config file.
57 |         # By default, queries listed here will override any specified in a config file.
58 |         # Prefix the list here with "+" to use these queries and those in the config file.
59 | 
60 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
61 |         # queries: security-extended,security-and-quality
62 | 
63 | 
64 |     # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
65 |     # If this step fails, then you should remove it and run the build manually (see below)
66 |     - name: Autobuild
67 |       uses: github/codeql-action/autobuild@v2
68 | 
69 |     # ℹ️ Command-line programs to run using the OS shell.
70 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
71 | 
72 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
73 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
74 | 
75 |     # - run: |
76 |     #     echo "Run, Build Application using script"
77 |     #     ./location_of_script_within_repo/buildscript.sh
78 | 
79 |     - name: Perform CodeQL Analysis
80 |       uses: github/codeql-action/analyze@v2
81 |       with:
82 |         category: "/language:${{matrix.language}}"
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | CHANGELOG.md
 2 | VERSION
 3 | /test/results
 4 | /dist
 5 | /snapshot
 6 | .server/
 7 | .vscode/
 8 | .history/
 9 | *.fingerprint
10 | *.tar
11 | *.jar
12 | *.war
13 | *.ear
14 | *.jpi
15 | *.hpi
16 | *.zip
17 | .idea/
18 | *.log
19 | .images
20 | .tmp/
21 | coverage.txt
22 | bin/
23 | .env
24 | 
25 | # Binaries for programs and plugins
26 | *.exe
27 | *.exe~
28 | *.dll
29 | *.so
30 | *.dylib
31 | 
32 | # Test binary, build with `go test -c`
33 | *.test
34 | 
35 | # Output of the go coverage tool, specifically when used with LiteIDE
36 | *.out
37 | 
38 | # macOS Finder metadata
39 | .DS_STORE
40 | 
41 | *.profile
42 | 
43 | # attestation
44 | cosign.key
45 | cosign.pub
46 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | [#](#) Contributing to Syft
  2 | 
  3 | If you are looking to contribute to this project and want to open a GitHub pull request ("PR"), there are a few guidelines of what we are looking for in patches. Make sure you go through this document and ensure that your code proposal is aligned.
  4 | 
  5 | ## Setting up your environment
  6 | 
  7 | Before you can contribute to Syft, you need to configure your development environment.
  8 | 
  9 | ### Debian setup
 10 | 
 11 | You will need to install Go. The version on https://go.dev works best, using the system golang doesn't always work the way you might expect.
 12 | 
 13 | Refer to the go.mod file in the root of this repo for the recommended version of Go to install.
 14 | 
 15 | You will also need Docker. There's no reason the system packages shouldn't work, but we used the official Docker package. You can find instructions for installing Docker in Debian [here](https://docs.docker.com/engine/install/debian/).
 16 | 
 17 | You also need to install some Debian packages
 18 | 
 19 | ```sh
 20 | sudo apt-get install build-essential zip bc libxml2-utils git
 21 | ```
 22 | 
 23 | ## Configuring Git
 24 | 
 25 | You will need to configure your git client with your name and email address. This is easily done from the command line.
 26 | 
 27 | ```text
 28 | $ git config --global user.name "John Doe"
 29 | $ git config --global user.email "john.doe@example.com"
 30 | ```
 31 | 
 32 | This username and email address will matter later in this guide.
 33 | 
 34 | ## Fork the repo
 35 | 
 36 | You should fork the Syft repo using the "Fork" button at the top right of the Syft GitHub [site](https://github.com/anchore/syft/). You will be doing your development in your fork, then submit a pull request to Syft. There are many resources how to use GitHub effectively, we will not cover those here.
 37 | 
 38 | ## Adding a feature or fix
 39 | 
 40 | If you look at the Syft [Issue](https://github.com/anchore/syft/issues) there are plenty of bugs and feature requests. Maybe look at the [good first issue](https://github.com/anchore/syft/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) list if you're not sure where to start.
 41 | 
 42 | ## Commit guidelines
 43 | 
 44 | In the Syft project we like commits and pull requests (PR) to be easy to understand and review. Open source thrives best when everything happening is over documented and small enough to be understood.
 45 | 
 46 | ### Granular commits
 47 | 
 48 | Please try to make every commit as simple as possible, but no simpler. The idea is that each commit should be a logical unit of code. Try not to commit too many tiny changes, for example every line changed in a file as a separate commit. And also try not to make a commit enormous, for example committing all your work at the end of the day.
 49 | 
 50 | Rather than try to follow a strict guide on what is or is not best, we try to be flexible and simple in this space. Do what makes the most sense for the changes you are trying to include.
 51 | 
 52 | ### Commit title and description
 53 | 
 54 | Remember that the message you leave for a commit is for the reviewer in the present, and for someone (maybe you) changing something in the future. Please make sure the title and description used is easy to understand and explains what was done. Jokes and clever comments generally don't age well in commit messages. Just the facts please.
 55 | 
 56 | ## Sign off your work
 57 | 
 58 | The `sign-off` is an added line at the end of the explanation for the commit, certifying that you wrote it or otherwise have the right to submit it as an open-source patch. By submitting a contribution, you agree to be bound by the terms of the DCO Version 1.1 and Apache License Version 2.0.
 59 | 
 60 | Signing off a commit certifies the below Developer's Certificate of Origin (DCO):
 61 | 
 62 | ```text
 63 | Developer's Certificate of Origin 1.1
 64 | 
 65 | By making a contribution to this project, I certify that:
 66 | 
 67 |    (a) The contribution was created in whole or in part by me and I
 68 |        have the right to submit it under the open source license
 69 |        indicated in the file; or
 70 | 
 71 |    (b) The contribution is based upon previous work that, to the best
 72 |        of my knowledge, is covered under an appropriate open source
 73 |        license and I have the right under that license to submit that
 74 |        work with modifications, whether created in whole or in part
 75 |        by me, under the same open source license (unless I am
 76 |        permitted to submit under a different license), as indicated
 77 |        in the file; or
 78 | 
 79 |    (c) The contribution was provided directly to me by some other
 80 |        person who certified (a), (b) or (c) and I have not modified
 81 |        it.
 82 | 
 83 |    (d) I understand and agree that this project and the contribution
 84 |        are public and that a record of the contribution (including all
 85 |        personal information I submit with it, including my sign-off) is
 86 |        maintained indefinitely and may be redistributed consistent with
 87 |        this project or the open source license(s) involved.
 88 | ```
 89 | 
 90 | All contributions to this project are licensed under the [Apache License Version 2.0, January 2004](http://www.apache.org/licenses/).
 91 | 
 92 | When committing your change, you can add the required line manually so that it looks like this:
 93 | 
 94 | ```text
 95 | Signed-off-by: John Doe <john.doe@example.com>
 96 | ```
 97 | 
 98 | Creating a signed-off commit is then possible with `-s` or `--signoff`:
 99 | 
100 | ```text
101 | $ git commit -s -m "this is a commit message"
102 | ```
103 | 
104 | To double-check that the commit was signed-off, look at the log output:
105 | 
106 | ```text
107 | $ git log -1
108 | commit 37ceh170e4hb283bb73d958f2036ee5k07e7fde7 (HEAD -> issue-35, origin/main, main)
109 | Author: John Doe <john.doe@example.com>
110 | Date:   Mon Aug 1 11:27:13 2020 -0400
111 | 
112 |     this is a commit message
113 | 
114 |     Signed-off-by: John Doe <john.doe@example.com>
115 | ```
116 | 
117 | ## Test your changes
118 | 
119 | This project has a `Makefile` which includes many helpers running both unit and integration tests. You can run `make help` to see all the options. Although PRs will have automatic checks for these, it is useful to run them locally, ensuring they pass before submitting changes. Ensure you've bootstrapped once before running tests:
120 | 
121 | ```text
122 | $ make bootstrap
123 | ```
124 | 
125 | You only need to bootstrap once. After the bootstrap process, you can run the tests as many times as needed:
126 | 
127 | ```text
128 | $ make unit
129 | $ make integration
130 | ```
131 | 
132 | You can also run `make all` to run a more extensive test suite, but there is additional configuration that will be needed for those tests to run correctly. We will not cover the extra steps here.
133 | 
134 | ## Pull Request
135 | 
136 | If you made it this far and all the tests are passing, it's time to submit a Pull Request (PR) for Syft. Submitting a PR is always a scary moment as what happens next can be an unknown. The Syft project strives to be easy to work with, we appreciate all contributions. Nobody is going to yell at you or try to make you feel bad. We love contributions and know how scary that first PR can be.
137 | 
138 | ### PR Title and Description
139 | 
140 | Just like the commit title and description mentioned above, the PR title and description is very important for letting others know what's happening. Please include any details you think a reviewer will need to more properly review your PR.
141 | 
142 | A PR that is very large or poorly described has a higher likelihood of being pushed to the end of the list. Reviewers like PRs they can understand and quickly review.
143 | 
144 | ### What to expect next
145 | 
146 | Please be patient with the project. We try to review PRs in a timely manner, but this is highly dependent on all the other tasks we have going on. It's OK to ask for a status update every week or two, it's not OK to ask for a status update every day.
147 | 
148 | It's very likely the reviewer will have questions and suggestions for changes to your PR. If your changes don't match the current style and flow of the other code, expect a request to change what you've done.
149 | 
150 | ## Document your changes
151 | 
152 | And lastly, when proposed changes are modifying user-facing functionality or output, it is expected the PR will include updates to the documentation as well. Syft is not a project that is heavy on documentation. This will mostly be updating the README and help for the tool.
153 | 
154 | If nobody knows new features exist, they can't use them!
155 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Syft Contributors
2 | 
3 | The following Syft components were contributed by external authors/organizations.
4 | 
5 | ## GraalVM Native Image
6 | 
7 | A cataloger contributed by Oracle Corporation that extracts packages given within GraalVM Native Image SBOMs.
8 | 


--------------------------------------------------------------------------------
/DEVELOPING.md:
--------------------------------------------------------------------------------
  1 | # Developing
  2 | 
  3 | ## Getting started
  4 | 
  5 | In order to test and develop in this repo you will need the following dependencies installed:
  6 | - Golang
  7 | - docker
  8 | - make
  9 | 
 10 | After cloning the following step can help you get setup:
 11 | 1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities.
 12 | 2. run `make` to view the selection of developer commands in the Makefile
 13 | 3. run `make build` to build the release snapshot binaries and packages
 14 | 4. for an even quicker start you can run `go run cmd/syft/main.go` to print the syft help.
 15 | 	- this command `go run cmd/syft/main.go alpine:latest` will compile and run syft against `alpine:latest`
 16 | 5. view the README or syft help output for more output options
 17 | 
 18 | The main make tasks for common static analysis and testing are `lint`, `format`, `lint-fix`, `unit`, `integration`, and `cli`.
 19 | 
 20 | See `make help` for all the current make tasks.
 21 | 
 22 | ## Architecture
 23 | 
 24 | Syft is used to generate a Software Bill of Materials (SBOM) from different kinds of input.
 25 | 
 26 | ### Code organization for the cmd package
 27 | 
 28 | Syft's entrypoint can be found in the `cmd` package at `cmd/syft/main.go`. `main.go` builds a new syft `cli` via `cli.New()` 
 29 | and then executes the `cli` via `cli.Execute()`. The `cli` package is responsible for parsing command line arguments, 
 30 | setting up the application context and configuration, and executing the application. Each of syft's commands 
 31 | (e.g. `packages`, `attest`, `version`) are implemented as a `cobra.Command` in their respective `<command>.go` files. 
 32 | They are registered in `syft/cli/commands/go`.
 33 | ```
 34 | .
 35 | └── syft/
 36 |     ├── cli/
 37 |     │   ├── attest/
 38 |     │   ├── attest.go
 39 |     │   ├── commands.go
 40 |     │   ├── completion.go
 41 |     │   ├── convert/
 42 |     │   ├── convert.go
 43 |     │   ├── eventloop/
 44 |     │   ├── options/
 45 |     │   ├── packages/
 46 |     │   ├── packages.go
 47 |     │   ├── poweruser/
 48 |     │   ├── poweruser.go
 49 |     │   └── version.go
 50 |     └── main.go
 51 | ```
 52 | 
 53 | #### Execution flow
 54 | 
 55 | ```mermaid
 56 | sequenceDiagram
 57 |     participant main as cmd/syft/main
 58 |     participant cli as cli.New()
 59 |     participant root as root.Execute()
 60 |     participant cmd as <command>.Execute()
 61 | 
 62 |     main->>+cli: 
 63 | 
 64 |     Note right of cli: wire ALL CLI commands
 65 |     Note right of cli: add flags for ALL commands
 66 | 
 67 |     cli-->>-main:  root command 
 68 | 
 69 |     main->>+root: 
 70 |     root->>+cmd: 
 71 |     cmd-->>-root: (error)  
 72 | 
 73 |     root-->>-main: (error) 
 74 | 
 75 |     Note right of cmd: Execute SINGLE command from USER
 76 | ```
 77 | 
 78 | ### Code organization for syft library
 79 | 
 80 | Syft's core library (see, exported) functionality is implemented in the `syft` package. The `syft` package is responsible for organizing the core
 81 | SBOM data model, it's translated output formats, and the core SBOM generation logic.
 82 | 
 83 | - analysis creates a static SBOM which can be encoded and decoded
 84 | - format objects, should strive to not add or enrich data in encoding that could otherwise be done during analysis
 85 | - package catalogers and their organization can be viewed/added to the `syft/pkg/cataloger` package 
 86 | - file catalogers and their organization can be viewed/added to the `syft/file` package
 87 | - The source package provides an abstraction to allow a user to loosely define a data source that can be cataloged
 88 | 
 89 | #### Code example of syft as a library
 90 | 
 91 | Here is a gist of using syft as a library to generate a SBOM for a docker image: [link](https://gist.github.com/wagoodman/57ed59a6d57600c23913071b8470175b).
 92 | The execution flow for the example is detailed below.
 93 | 
 94 | #### Execution flow examples for the syft library
 95 | 
 96 | ```mermaid
 97 | sequenceDiagram
 98 |     participant source as source.New(ubuntu:latest)
 99 |     participant sbom as sbom.SBOM
100 |     participant catalog as syft.CatalogPackages(src)
101 |     participant encoder as syft.Encode(sbom, format)
102 | 
103 |     Note right of source: use "ubuntu:latest" as SBOM input
104 | 
105 |     source-->>+sbom: add source to SBOM struct
106 |     source-->>+catalog: pass src to generate catalog
107 |     catalog-->-sbom: add cataloging results onto SBOM
108 |     sbom-->>encoder: pass SBOM and format desiered to syft encoder
109 |     encoder-->>source: return bytes that are the SBOM of the original input 
110 | 
111 |     Note right of catalog: cataloger configuration is done based on src
112 | ```
113 | 
114 | 
115 | ### Syft Catalogers
116 | 
117 | ##### Summary
118 | 
119 | Catalogers are the way in which syft is able to identify and construct packages given some amount of source metadata.
120 | For example, Syft can locate and process `package-lock.json` files when performing filesystem scans. 
121 | See: [how to specify file globs](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21)
122 | and an implementation of the [package-lock.json parser](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) for a quick review.
123 | 
124 | From a high level catalogers have the following properties:
125 | 
126 | - They are independent from one another. The java cataloger has no idea of the processes, assumptions, or results of the python cataloger, for example.
127 | 
128 | - They do not know what source is being analyzed. Are we analyzing a local directory? an image? if so, the squashed representation or all layers? The catalogers do not know the answers to these questions. Only that there is an interface to query for file paths and contents from an underlying "source" being scanned.
129 | 
130 | - Packages created by the cataloger should not be mutated after they are created. There is one exception made for adding CPEs to a package after the cataloging phase, but that will most likely be moved back into the cataloger in the future.
131 | 
132 | #### Building a new Cataloger
133 | 
134 | Catalogers must fulfill the interface [found here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger.go). 
135 | This means that when building a new cataloger, the new struct must implement both method signatures of `Catalog` and `Name`.
136 | 
137 | A top level view of the functions that construct all the catalogers can be found [here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/cataloger.go).
138 | When an author has finished writing a new cataloger this is the spot to plug in the new catalog constructor.
139 | 
140 | For a top level view of how the catalogers are used see [this function](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/catalog.go#L41-L100) as a reference. It ranges over all catalogers passed as an argument and invokes the `Catalog` method:
141 | 
142 | Each cataloger has its own `Catalog` method, but this does not mean that they are all vastly different.
143 | Take a look at the `apkdb` cataloger for alpine to see how it [constructs a generic.NewCataloger](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/cataloger.go).
144 | 
145 | `generic.NewCataloger` is an abstraction syft uses to make writing common components easier. First, it takes the `catalogerName` to identify the cataloger.
146 | On the other side of the call it uses two key pieces which inform the cataloger how to identify and return packages, the `globPatterns` and the `parseFunction`:
147 | - The first piece is a `parseByGlob` matching pattern used to identify the files that contain the package metadata.
148 | See [here for the APK example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/apk_metadata.go#L16-L41).
149 | - The other is a `parseFunction` which informs the cataloger what to do when it has found one of the above matches files.
150 | See this [link for an example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L22-L102).
151 | 
152 | If you're unsure about using the `Generic Cataloger` and think the use case being filled requires something more custom
153 | just file an issue or ask in our slack, and we'd be more than happy to help on the design.
154 | 
155 | Identified packages share a common struct so be sure that when the new cataloger is constructing a new package it is using the [`Package` struct](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/package.go#L16-L31).
156 | 
157 | Metadata Note: Identified packages are also assigned specific metadata that can be unique to their environment. 
158 | See [this folder](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg) for examples of the different metadata types.
159 | These are plugged into the `MetadataType` and `Metadata` fields in the above struct. `MetadataType` informs which type is being used. `Metadata` is an interface converted to that type.
160 | 
161 | Finally, here is an example of where the package construction is done in the apk cataloger. The first link is where `newPackage` is called in the `parseFunction`. The second link shows the package construction:
162 | - [Call for new package](https://github.com/anchore/syft/blob/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L106)
163 | - [APK Package Constructor](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/package.go#L12-L27)
164 | 
165 | If you have more questions about implementing a cataloger or questions about one you might be currently working
166 | always feel free to file an issue or reach out to us [on slack](https://anchore.com/slack).
167 | 
168 | #### Searching for files
169 | 
170 | All catalogers are provided an instance of the [`source.FileResolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these 
171 | abstractions leverage [`stereoscope`](https://github.com/anchore/stereoscope) in order to perform searching. Here is a 
172 | rough outline how that works:
173 | 
174 | 1. a stereoscope `file.Index` is searched based on the input given (a path, glob, or MIME type). The index is relatively fast to search, but requires results to be filtered down to the files that exist in the specific layer(s) of interest. This is done automatically by the `filetree.Searcher` abstraction. This abstraction will fallback to searching directly against the raw `filetree.FileTree` if the index does not contain the file(s) of interest. Note: the `filetree.Searcher` is used by the `source.FileResolver` abstraction.
175 | 2. Once the set of files are returned from the `filetree.Searcher` the results are filtered down further to return the most unique file results. For example, you may have requested for files by a glob that returns multiple results. These results are filtered down to deduplicate by real files, so if a result contains two references to the same file, say one accessed via symlink and one accessed via the real path, then the real path reference is returned and the symlink reference is filtered out. If both were accessed by symlink then the first (by lexical order) is returned. This is done automatically by the `source.FileResolver` abstraction.
176 | 3. By the time results reach the `pkg.Cataloger` you are guaranteed to have a set of unique files that exist in the layer(s) of interest (relative to what the resolver supports).
177 | 
178 | ## Testing
179 | 
180 | ### Levels of testing
181 | 
182 | - `unit`: The default level of test which is distributed throughout the repo are unit tests. Any `_test.go` file that 
183 |   does not reside somewhere within the `/test` directory is a unit test. Other forms of testing should be organized in 
184 |   the `/test` directory. These tests should focus on correctness of functionality in depth. % test coverage metrics 
185 |   only considers unit tests and no other forms of testing.
186 | 
187 | - `integration`: located within `test/integration`, these tests focus on the behavior surfaced by the common library 
188 |   entrypoints from the `syft` package and make light assertions about the results surfaced. Additionally, these tests
189 |   tend to make diversity assertions for enum-like objects, ensuring that as enum values are added to a definition
190 |   that integration tests will automatically fail if no test attempts to use that enum value. For more details see 
191 |   the "Data diversity and freshness assertions" section below.
192 | 
193 | - `cli`: located with in `test/cli`, these are tests that test the correctness of application behavior from a 
194 |   snapshot build. This should be used in cases where a unit or integration test will not do or if you are looking
195 |   for in-depth testing of code in the `cmd/` package (such as testing the proper behavior of application configuration,
196 |   CLI switches, and glue code before syft library calls).
197 | 
198 | - `acceptance`: located within `test/compare` and `test/install`, these are smoke-like tests that ensure that application  
199 |   packaging and installation works as expected. For example, during release we provide RPM packages as a download 
200 |   artifact. We also have an accompanying RPM acceptance test that installs the RPM from a snapshot build and ensures the 
201 |   output of a syft invocation matches canned expected output. New acceptance tests should be added for each release artifact
202 |   and architecture supported (when possible).
203 | 
204 | ### Data diversity and freshness assertions
205 | 
206 | It is important that tests against the codebase are flexible enough to begin failing when they do not cover "enough"
207 | of the objects under test. "Cover" in this case does not mean that some percentage of the code has been executed 
208 | during testing, but instead that there is enough diversity of data input reflected in testing relative to the
209 | definitions available.
210 | 
211 | For instance, consider an enum-like value like so:
212 | ```go
213 | type Language string
214 | 
215 | const (
216 |   Java            Language = "java"
217 |   JavaScript      Language = "javascript"
218 |   Python          Language = "python"
219 |   Ruby            Language = "ruby"
220 |   Go              Language = "go"
221 | )
222 | ```
223 | 
224 | Say we have a test that exercises all the languages defined today:
225 | 
226 | ```go
227 | func TestCatalogPackages(t *testing.T) {
228 |   testTable := []struct {
229 |     // ... the set of test cases that test all languages
230 |   }
231 |   for _, test := range cases {
232 |     t.Run(test.name, func (t *testing.T) {
233 |       // use inputFixturePath and assert that syft.CatalogPackages() returns the set of expected Package objects
234 |       // ...
235 |     })
236 |   }
237 | }
238 | ```
239 | 
240 | Where each test case has a `inputFixturePath` that would result with packages from each language. This test is
241 | brittle since it does not assert that all languages were exercised directly and future modifications (such as 
242 | adding a new language) won't be covered by any test cases.
243 | 
244 | To address this the enum-like object should have a definition of all objects that can be used in testing:
245 | 
246 | ```go
247 | type Language string
248 | 
249 | // const( Java Language = ..., ... )
250 | 
251 | var AllLanguages = []Language{
252 | 	Java,
253 | 	JavaScript,
254 | 	Python,
255 | 	Ruby,
256 | 	Go,
257 | 	Rust,
258 | }
259 | ```
260 | 
261 | Allowing testing to automatically fail when adding a new language:
262 | 
263 | ```go
264 | func TestCatalogPackages(t *testing.T) {
265 |   testTable := []struct {
266 |   	// ... the set of test cases that (hopefully) covers all languages
267 |   }
268 | 
269 |   // new stuff...
270 |   observedLanguages := strset.New()
271 |   
272 |   for _, test := range cases {
273 |     t.Run(test.name, func (t *testing.T) {
274 |       // use inputFixturePath and assert that syft.CatalogPackages() returns the set of expected Package objects
275 |     	// ...
276 |     	
277 |     	// new stuff...
278 |     	for _, actualPkg := range actual {
279 |         observedLanguages.Add(string(actualPkg.Language))
280 |     	}
281 |     	
282 |     })
283 |   }
284 | 
285 |    // new stuff...
286 |   for _, expectedLanguage := range pkg.AllLanguages {
287 |     if 	!observedLanguages.Contains(expectedLanguage) {
288 |       t.Errorf("failed to test language=%q", expectedLanguage)	
289 |     }
290 |   }
291 | }
292 | ```
293 | 
294 | This is a better test since it will fail when someone adds a new language but fails to write a test case that should
295 | exercise that new language. This method is ideal for integration-level testing, where testing correctness in depth 
296 | is not needed (that is what unit tests are for) but instead testing in breadth to ensure that units are well integrated.
297 | 
298 | A similar case can be made for data freshness; if the quality of the results will be diminished if the input data
299 | is not kept up to date then a test should be written (when possible) to assert any input data is not stale.
300 | 
301 | An example of this is the static list of licenses that is stored in `internal/spdxlicense` for use by the SPDX 
302 | presenters. This list is updated and published periodically by an external group and syft can grab and update this
303 | list by running `go generate ./...` from the root of the repo.
304 | 
305 | An integration test has been written to grabs the latest license list version externally and compares that version
306 | with the version generated in the codebase. If they differ, the test fails, indicating to someone that there is an
307 | action needed to update it.
308 | 
309 | **_The key takeaway is to try and write tests that fail when data assumptions change and not just when code changes.**_
310 | 
311 | ### Snapshot tests
312 | 
313 | The format objects make a lot of use of "snapshot" testing, where you save the expected output bytes from a call into the
314 | git repository and during testing make a comparison of the actual bytes from the subject under test with the golden
315 | copy saved in the repo. The "golden" files are stored in the `test-fixtures/snapshot` directory relative to the go 
316 | package under test and should always be updated by invoking `go test` on the specific test file with a specific CLI 
317 | update flag provided.
318 | 
319 | Many of the `Format` tests make use of this approach, where the raw SBOM report is saved in the repo and the test 
320 | compares that SBOM with what is generated from the latest presenter code. For instance, at the time of this writing 
321 | the CycloneDX presenter snapshots can be updated by running:
322 | 
323 | ```bash
324 | go test ./internal/formats -update-cyclonedx
325 | ```
326 | 
327 | These flags are defined at the top of the test files that have tests that use the snapshot files.
328 | 
329 | Snapshot testing is only as good as the manual verification of the golden snapshot file saved to the repo! Be careful 
330 | and diligent when updating these files.
331 | 
332 | 
333 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/distroless/static-debian11:debug AS build
 2 | 
 3 | # Add a new stage for the final image
 4 | FROM scratch
 5 | 
 6 | # Copy the ca-certificates.crt file from the build stage
 7 | COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
 8 | 
 9 | # Set the working directory within the container
10 | WORKDIR /tmp
11 | 
12 | # Copy the "syft" binary from your project directory to the container
13 | COPY syft /
14 | 
15 | # Define the ARGs for labeling
16 | ARG BUILD_DATE
17 | ARG BUILD_VERSION
18 | ARG VCS_REF
19 | ARG VCS_URL
20 | 
21 | # Define the image labels
22 | LABEL org.opencontainers.image.created=$BUILD_DATE
23 | LABEL org.opencontainers.image.title="syft"
24 | LABEL org.opencontainers.image.description="CLI tool and library for generating a Software Bill of Materials from container images and filesystems"
25 | LABEL org.opencontainers.image.source=$VCS_URL
26 | LABEL org.opencontainers.image.revision=$VCS_REF
27 | LABEL org.opencontainers.image.vendor="Anchore, Inc."
28 | LABEL org.opencontainers.image.version=$BUILD_VERSION
29 | LABEL org.opencontainers.image.licenses="Apache-2.0"
30 | LABEL io.artifacthub.package.readme-url="https://raw.githubusercontent.com/anchore/syft/main/README.md"
31 | LABEL io.artifacthub.package.logo-url="https://user-images.githubusercontent.com/5199289/136844524-1527b09f-c5cb-4aa9-be54-5aa92a6086c1.png"
32 | LABEL io.artifacthub.package.license="Apache-2.0"
33 | 
34 | # Set the entry point for the container
35 | ENTRYPOINT ["/syft"]
36 | 


--------------------------------------------------------------------------------
/Dockerfile.debug:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/distroless/static-debian11:debug
 2 | 
 3 | # create the /tmp dir, which is needed for image content cache
 4 | WORKDIR /tmp
 5 | 
 6 | COPY syft /
 7 | 
 8 | ARG BUILD_DATE
 9 | ARG BUILD_VERSION
10 | ARG VCS_REF
11 | ARG VCS_URL
12 | 
13 | LABEL org.opencontainers.image.created=$BUILD_DATE
14 | LABEL org.opencontainers.image.title="syft"
15 | LABEL org.opencontainers.image.description="CLI tool and library for generating a Software Bill of Materials from container images and filesystems"
16 | LABEL org.opencontainers.image.source=$VCS_URL
17 | LABEL org.opencontainers.image.revision=$VCS_REF
18 | LABEL org.opencontainers.image.vendor="Anchore, Inc."
19 | LABEL org.opencontainers.image.version=$BUILD_VERSION
20 | LABEL org.opencontainers.image.licenses="Apache-2.0"
21 | LABEL io.artifacthub.package.readme-url="https://raw.githubusercontent.com/anchore/syft/main/README.md"
22 | LABEL io.artifacthub.package.logo-url="https://user-images.githubusercontent.com/5199289/136844524-1527b09f-c5cb-4aa9-be54-5aa92a6086c1.png"
23 | LABEL io.artifacthub.package.license="Apache-2.0"
24 | 
25 | ENTRYPOINT ["/syft"]
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | BIN := syft
  2 | TEMP_DIR := ./.tmp
  3 | 
  4 | # Command templates #################################
  5 | LINT_CMD := $(TEMP_DIR)/golangci-lint run --tests=false
  6 | GOIMPORTS_CMD := $(TEMP_DIR)/gosimports -local github.com/anchore
  7 | RELEASE_CMD := $(TEMP_DIR)/goreleaser release --clean
  8 | SNAPSHOT_CMD := $(RELEASE_CMD) --skip-publish --skip-sign --snapshot
  9 | CHRONICLE_CMD = $(TEMP_DIR)/chronicle
 10 | GLOW_CMD = $(TEMP_DIR)/glow
 11 | 
 12 | # Tool versions #################################
 13 | GOLANGCILINT_VERSION := v1.52.2
 14 | GOSIMPORTS_VERSION := v0.3.8
 15 | BOUNCER_VERSION := v0.4.0
 16 | CHRONICLE_VERSION := v0.6.0
 17 | GORELEASER_VERSION := v1.18.2
 18 | YAJSV_VERSION := v1.4.1
 19 | COSIGN_VERSION := v2.0.2
 20 | QUILL_VERSION := v0.2.0
 21 | GLOW_VERSION := v1.5.1
 22 | 
 23 | # Formatting variables #################################
 24 | BOLD := $(shell tput -T linux bold)
 25 | PURPLE := $(shell tput -T linux setaf 5)
 26 | GREEN := $(shell tput -T linux setaf 2)
 27 | CYAN := $(shell tput -T linux setaf 6)
 28 | RED := $(shell tput -T linux setaf 1)
 29 | RESET := $(shell tput -T linux sgr0)
 30 | TITLE := $(BOLD)$(PURPLE)
 31 | SUCCESS := $(BOLD)$(GREEN)
 32 | 
 33 | # Test variables #################################
 34 | COMPARE_DIR := ./test/compare
 35 | COMPARE_TEST_IMAGE := centos:8.2.2004
 36 | COVERAGE_THRESHOLD := 62  # the quality gate lower threshold for unit test total % coverage (by function statements)
 37 | 
 38 | ## Build variables #################################
 39 | VERSION := $(shell git describe --dirty --always --tags)
 40 | DIST_DIR := ./dist
 41 | SNAPSHOT_DIR := ./snapshot
 42 | CHANGELOG := CHANGELOG.md
 43 | OS := $(shell uname | tr '[:upper:]' '[:lower:]')
 44 | SNAPSHOT_BIN := $(realpath $(shell pwd)/$(SNAPSHOT_DIR)/$(OS)-build_$(OS)_amd64_v1/$(BIN))
 45 | 
 46 | ifndef VERSION
 47 | 	$(error VERSION is not set)
 48 | endif
 49 | 
 50 | define title
 51 |     @printf '$(TITLE)$(1)$(RESET)\n'
 52 | endef
 53 | 
 54 | define safe_rm_rf
 55 | 	bash -c 'test -z "$(1)" && false || rm -rf $(1)'
 56 | endef
 57 | 
 58 | define safe_rm_rf_children
 59 | 	bash -c 'test -z "$(1)" && false || rm -rf $(1)/*'
 60 | endef
 61 | 
 62 | .DEFAULT_GOAL:=help
 63 | 
 64 | 
 65 | .PHONY: all
 66 | all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests)
 67 | 	@printf '$(SUCCESS)All checks pass!$(RESET)\n'
 68 | 
 69 | .PHONY: static-analysis
 70 | static-analysis: check-go-mod-tidy lint check-licenses check-json-schema-drift  ## Run all static analysis checks
 71 | 
 72 | .PHONY: test
 73 | test: unit integration validate-cyclonedx-schema benchmark cli ## Run all tests (currently unit, integration, linux compare, and cli tests)
 74 | 
 75 | 
 76 | ## Bootstrapping targets #################################
 77 | 
 78 | .PHONY: bootstrap
 79 | bootstrap: $(TEMP_DIR) bootstrap-go bootstrap-tools ## Download and install all tooling dependencies (+ prep tooling in the ./tmp dir)
 80 | 	$(call title,Bootstrapping dependencies)
 81 | 
 82 | .PHONY: bootstrap-tools
 83 | bootstrap-tools: $(TEMP_DIR)
 84 | 	curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(QUILL_VERSION)
 85 | 	GO111MODULE=off GOBIN=$(realpath $(TEMP_DIR)) go get -u golang.org/x/perf/cmd/benchstat
 86 | 	curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMP_DIR)/ $(GOLANGCILINT_VERSION)
 87 | 	curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMP_DIR)/ $(BOUNCER_VERSION)
 88 | 	curl -sSfL https://raw.githubusercontent.com/anchore/chronicle/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(CHRONICLE_VERSION)
 89 | 	.github/scripts/goreleaser-install.sh -d -b $(TEMP_DIR)/ $(GORELEASER_VERSION)
 90 | 	# the only difference between goimports and gosimports is that gosimports removes extra whitespace between import blocks (see https://github.com/golang/go/issues/20818)
 91 | 	GOBIN="$(realpath $(TEMP_DIR))" go install github.com/rinchsan/gosimports/cmd/gosimports@$(GOSIMPORTS_VERSION)
 92 | 	GOBIN="$(realpath $(TEMP_DIR))" go install github.com/neilpa/yajsv@$(YAJSV_VERSION)
 93 | 	GOBIN="$(realpath $(TEMP_DIR))" go install github.com/sigstore/cosign/v2/cmd/cosign@$(COSIGN_VERSION)
 94 | 	GOBIN="$(realpath $(TEMP_DIR))" go install github.com/charmbracelet/glow@$(GLOW_VERSION)
 95 | 
 96 | .PHONY: bootstrap-go
 97 | bootstrap-go:
 98 | 	go mod download
 99 | 
100 | $(TEMP_DIR):
101 | 	mkdir -p $(TEMP_DIR)
102 | 
103 | 
104 | ## Static analysis targets #################################
105 | 
106 | .PHONY: lint
107 | lint:  ## Run gofmt + golangci lint checks
108 | 	$(call title,Running linters)
109 | 	# ensure there are no go fmt differences
110 | 	@printf "files with gofmt issues: [$(shell gofmt -l -s .)]\n"
111 | 	@test -z "$(shell gofmt -l -s .)"
112 | 
113 | 	# run all golangci-lint rules
114 | 	$(LINT_CMD)
115 | 	@[ -z "$(shell $(GOIMPORTS_CMD) -d .)" ] || (echo "goimports needs to be fixed" && false)
116 | 
117 | 	# go tooling does not play well with certain filename characters, ensure the common cases don't result in future "go get" failures
118 | 	$(eval MALFORMED_FILENAMES := $(shell find . | grep -e ':'))
119 | 	@bash -c "[[ '$(MALFORMED_FILENAMES)' == '' ]] || (printf '\nfound unsupported filename characters:\n$(MALFORMED_FILENAMES)\n\n' && false)"
120 | 
121 | .PHONY: format
122 | format:  ## Auto-format all source code
123 | 	$(call title,Running formatters)
124 | 	gofmt -w -s .
125 | 	$(GOIMPORTS_CMD) -w .
126 | 	go mod tidy
127 | 
128 | .PHONY: lint-fix
129 | lint-fix: format  ## Auto-format all source code + run golangci lint fixers
130 | 	$(call title,Running lint fixers)
131 | 	$(LINT_CMD) --fix
132 | 
133 | .PHONY: check-licenses
134 | check-licenses:  ## Ensure transitive dependencies are compliant with the current license policy
135 | 	$(call title,Checking for license compliance)
136 | 	$(TEMP_DIR)/bouncer check ./...
137 | 
138 | check-go-mod-tidy:
139 | 	@ .github/scripts/go-mod-tidy-check.sh && echo "go.mod and go.sum are tidy!"
140 | 
141 | check-json-schema-drift:
142 | 	$(call title,Ensure there is no drift between the JSON schema and the code)
143 | 	@.github/scripts/json-schema-drift-check.sh
144 | 
145 | ## Testing targets #################################
146 | 
147 | .PHONY: unit
148 | unit: $(TEMP_DIR) fixtures  ## Run unit tests (with coverage)
149 | 	$(call title,Running unit tests)
150 | 	go test -coverprofile $(TEMP_DIR)/unit-coverage-details.txt $(shell go list ./... | grep -v anchore/syft/test)
151 | 	@.github/scripts/coverage.py $(COVERAGE_THRESHOLD) $(TEMP_DIR)/unit-coverage-details.txt
152 | 
153 | .PHONY: integration
154 | integration:  ## Run integration tests
155 | 	$(call title,Running integration tests)
156 | 	go test -v ./test/integration
157 | 
158 | .PHONY: validate-cyclonedx-schema
159 | validate-cyclonedx-schema:
160 | 	cd schema/cyclonedx && make
161 | 
162 | .PHONY: cli
163 | cli: $(SNAPSHOT_DIR)  ## Run CLI tests
164 | 	chmod 755 "$(SNAPSHOT_BIN)"
165 | 	$(SNAPSHOT_BIN) version
166 | 	SYFT_BINARY_LOCATION='$(SNAPSHOT_BIN)' \
167 | 		go test -count=1 -timeout=15m -v ./test/cli
168 | 
169 | 
170 | ## Benchmark test targets #################################
171 | 
172 | .PHONY: benchmark
173 | benchmark: $(TEMP_DIR)  ## Run benchmark tests and compare against the baseline (if available)
174 | 	$(call title,Running benchmark tests)
175 | 	go test -p 1 -run=^Benchmark -bench=. -count=7 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(VERSION).txt
176 | 	(test -s $(TEMP_DIR)/benchmark-main.txt && \
177 | 		$(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(VERSION).txt || \
178 | 		$(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(VERSION).txt) \
179 | 			| tee $(TEMP_DIR)/benchstat.txt
180 | 
181 | .PHONY: show-benchstat
182 | show-benchstat:
183 | 	@cat $(TEMP_DIR)/benchstat.txt
184 | 
185 | 
186 | ## Test-fixture-related targets #################################
187 | 
188 | # note: this is used by CI to determine if various test fixture cache should be restored or recreated
189 | fingerprints:
190 | 	$(call title,Creating all test cache input fingerprints)
191 | 
192 | 	# for IMAGE integration test fixtures
193 | 	cd test/integration/test-fixtures && \
194 | 		make cache.fingerprint
195 | 
196 | 	# for BINARY test fixtures
197 | 	cd syft/pkg/cataloger/binary/test-fixtures && \
198 | 		make cache.fingerprint
199 | 
200 | 	# for JAVA BUILD test fixtures
201 | 	cd syft/pkg/cataloger/java/test-fixtures/java-builds && \
202 | 		make cache.fingerprint
203 | 
204 | 	# for GO BINARY test fixtures
205 | 	cd syft/pkg/cataloger/golang/test-fixtures/archs && \
206 | 		make binaries.fingerprint
207 | 
208 | 	# for RPM test fixtures
209 | 	cd syft/pkg/cataloger/rpm/test-fixtures && \
210 | 		make rpms.fingerprint
211 | 
212 | 	# for Kernel test fixtures
213 | 	cd syft/pkg/cataloger/kernel/test-fixtures && \
214 | 		make cache.fingerprint
215 | 
216 | 	# for INSTALL integration test fixtures
217 | 	cd test/install && \
218 | 		make cache.fingerprint
219 | 
220 | 	# for CLI test fixtures
221 | 	cd test/cli/test-fixtures && \
222 | 		make cache.fingerprint
223 | 
224 | .PHONY: fixtures
225 | fixtures:
226 | 	$(call title,Generating test fixtures)
227 | 	cd syft/pkg/cataloger/java/test-fixtures/java-builds && make
228 | 	cd syft/pkg/cataloger/rpm/test-fixtures && make
229 | 	cd syft/pkg/cataloger/binary/test-fixtures && make
230 | 
231 | .PHONY: show-test-image-cache
232 | show-test-image-cache:  ## Show all docker and image tar cache
233 | 	$(call title,Docker daemon cache)
234 | 	@docker images --format '{{.ID}} {{.Repository}}:{{.Tag}}' | grep stereoscope-fixture- | sort
235 | 
236 | 	$(call title,Tar cache)
237 | 	@find . -type f -wholename "**/test-fixtures/cache/stereoscope-fixture-*.tar" | sort
238 | 
239 | .PHONY: show-test-snapshots
240 | show-test-snapshots:  ## Show all test snapshots
241 | 	$(call title,Test snapshots)
242 | 	@find . -type f -wholename "**/test-fixtures/snapshot/*" | sort
243 | 
244 | 
245 | ## install.sh testing targets #################################
246 | 
247 | install-test: $(SNAPSHOT_DIR)
248 | 	cd test/install && \
249 | 		make
250 | 
251 | install-test-cache-save: $(SNAPSHOT_DIR)
252 | 	cd test/install && \
253 | 		make save
254 | 
255 | install-test-cache-load: $(SNAPSHOT_DIR)
256 | 	cd test/install && \
257 | 		make load
258 | 
259 | install-test-ci-mac: $(SNAPSHOT_DIR)
260 | 	cd test/install && \
261 | 		make ci-test-mac
262 | 
263 | .PHONY: generate-compare-file
264 | generate-compare-file:
265 | 	$(call title,Generating compare test file)
266 | 	go run ./cmd/syft $(COMPARE_TEST_IMAGE) -o json > $(COMPARE_DIR)/test-fixtures/acceptance-centos-8.2.2004.json
267 | 
268 | # note: we cannot clean the snapshot directory since the pipeline builds the snapshot separately
269 | .PHONY: compare-mac
270 | compare-mac: $(TEMP_DIR) $(SNAPSHOT_DIR)  ## Run compare tests on build snapshot binaries and packages (Mac)
271 | 	$(call title,Running compare test: Run on Mac)
272 | 	$(COMPARE_DIR)/mac.sh \
273 | 			$(SNAPSHOT_DIR) \
274 | 			$(COMPARE_DIR) \
275 | 			$(COMPARE_TEST_IMAGE) \
276 | 			$(TEMP_DIR)
277 | 
278 | # note: we cannot clean the snapshot directory since the pipeline builds the snapshot separately
279 | .PHONY: compare-linux
280 | compare-linux: compare-test-deb-package-install compare-test-rpm-package-install  ## Run compare tests on build snapshot binaries and packages (Linux)
281 | 
282 | .PHONY: compare-test-deb-package-install
283 | compare-test-deb-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
284 | 	$(call title,Running compare test: DEB install)
285 | 	$(COMPARE_DIR)/deb.sh \
286 | 			$(SNAPSHOT_DIR) \
287 | 			$(COMPARE_DIR) \
288 | 			$(COMPARE_TEST_IMAGE) \
289 | 			$(TEMP_DIR)
290 | 
291 | .PHONY: compare-test-rpm-package-install
292 | compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
293 | 	$(call title,Running compare test: RPM install)
294 | 	$(COMPARE_DIR)/rpm.sh \
295 | 			$(SNAPSHOT_DIR) \
296 | 			$(COMPARE_DIR) \
297 | 			$(COMPARE_TEST_IMAGE) \
298 | 			$(TEMP_DIR)
299 | 
300 | 
301 | ## Code generation targets #################################
302 | 
303 | .PHONY: generate-json-schema
304 | generate-json-schema:  ## Generate a new json schema
305 | 	cd schema/json && go run generate.go
306 | 
307 | .PHONY: generate-license-list
308 | generate-license-list:  ## Generate an updated spdx license list
309 | 	go generate ./internal/spdxlicense/...
310 | 	gofmt -s -w ./internal/spdxlicense
311 | 
312 | 
313 | ## Build-related targets #################################
314 | 
315 | .PHONY: build
316 | build: $(SNAPSHOT_DIR)  ## Build release snapshot binaries and packages
317 | 
318 | $(SNAPSHOT_DIR):  ## Build snapshot release binaries and packages
319 | 	$(call title,Building snapshot artifacts)
320 | 
321 | 	# create a config with the dist dir overridden
322 | 	echo "dist: $(SNAPSHOT_DIR)" > $(TEMP_DIR)/goreleaser.yaml
323 | 	cat .goreleaser.yaml >> $(TEMP_DIR)/goreleaser.yaml
324 | 
325 | 	# build release snapshots
326 | 	$(SNAPSHOT_CMD) --config $(TEMP_DIR)/goreleaser.yaml
327 | 
328 | .PHONY: changelog
329 | changelog: clean-changelog  ## Generate and show the changelog for the current unreleased version
330 | 	$(CHRONICLE_CMD) -vvv -n --version-file VERSION > $(CHANGELOG)
331 | 	@$(GLOW_CMD) $(CHANGELOG)
332 | 
333 | $(CHANGELOG):
334 | 	$(CHRONICLE_CMD) -vvv > $(CHANGELOG)
335 | 
336 | .PHONY: release
337 | release:
338 | 	@.github/scripts/trigger-release.sh
339 | 
340 | .PHONY: ci-release
341 | ci-release: ci-check clean-dist $(CHANGELOG)
342 | 	$(call title,Publishing release artifacts)
343 | 
344 | 	# create a config with the dist dir overridden
345 | 	echo "dist: $(DIST_DIR)" > $(TEMP_DIR)/goreleaser.yaml
346 | 	cat .goreleaser.yaml >> $(TEMP_DIR)/goreleaser.yaml
347 | 
348 | 	bash -c "\
349 | 		$(RELEASE_CMD) \
350 | 			--config $(TEMP_DIR)/goreleaser.yaml \
351 | 			--release-notes <(cat $(CHANGELOG)) \
352 | 				 || (cat /tmp/quill-*.log && false)"
353 | 
354 | 	# upload the version file that supports the application version update check (excluding pre-releases)
355 | 	.github/scripts/update-version-file.sh "$(DIST_DIR)" "$(VERSION)"
356 | 
357 | .PHONY: ci-check
358 | ci-check:
359 | 	@.github/scripts/ci-check.sh
360 | 
361 | ## Cleanup targets #################################
362 | 
363 | .PHONY: clean
364 | clean: clean-dist clean-snapshot clean-test-image-cache  ## Remove previous builds, result reports, and test cache
365 | 	$(call safe_rm_rf_children,$(TEMP_DIR))
366 | 
367 | .PHONY: clean-snapshot
368 | clean-snapshot:
369 | 	$(call safe_rm_rf,$(SNAPSHOT_DIR))
370 | 	rm -f $(TEMP_DIR)/goreleaser.yaml
371 | 
372 | .PHONY: clean-dist
373 | clean-dist: clean-changelog
374 | 	$(call safe_rm_rf,$(DIST_DIR))
375 | 	rm -f $(TEMP_DIR)/goreleaser.yaml
376 | 
377 | .PHONY: clean-changelog
378 | clean-changelog:
379 | 	rm -f $(CHANGELOG) VERSION
380 | 
381 | clean-test-image-cache: clean-test-image-tar-cache clean-test-image-docker-cache ## Clean test image cache
382 | 
383 | .PHONY: clear-test-image-tar-cache
384 | clean-test-image-tar-cache:  ## Delete all test cache (built docker image tars)
385 | 	find . -type f -wholename "**/test-fixtures/cache/stereoscope-fixture-*.tar" -delete
386 | 
387 | .PHONY: clear-test-image-docker-cache
388 | clean-test-image-docker-cache:	## Purge all test docker images
389 | 	docker images --format '{{.ID}} {{.Repository}}' | grep stereoscope-fixture- | awk '{print $$1}' | uniq | xargs -r docker rmi --force
390 | 
391 | ## Halp! #################################
392 | 
393 | .PHONY: help
394 | help:  ## Display this help
395 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}'
396 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🌐 ChatCVE Langchain App 
  2 | 
  3 | ## 🎯 Description
  4 | The ChatCVE Lang Chain App is an AI powered DevSecOps application 🔍, to help organizations triage and aggregate CVE (Common Vulnerabilities & Exposures) information. By leveraging state-of-the-art Natural Language Processing, ChatCVE makes detailed Software Bill of Materials (SBOM) data accessible to everyone, because Security is everyone's job.  From Security analysts to Audit and Compliance teams, ChatCVE allows a more intuitive and engaging way to extract key findings. 🤖💬
  5 | 
  6 | ## 🚀 Features
  7 | - **🧠 Natural Language Queries**: Ask questions using plain English (or your preferred language)! No need to grapple with complex query languages. 
  8 | - **🔮 AI-Powered Analysis**: Our app is backed by the Langchain AI framework.  It can easily surface important vulnerability information using Human Language.  The requests are automatically translated to [SQL](https://python.langchain.com/docs/integrations/toolkits/sql_database) for querying specific artifact findings.
  9 | - **⏭️ Proactive Assistance**: Anyone can identify potential concerns proactively to improve the overall Cyber Security Posture.
 10 | - **🔁 Triage & Remediation**: Assist in Vulnerability remediation using National Vulnerability Database (NVD), Syft, and Grype wrappers.  Can be extended to triage using other CVE advisory databases.
 11 | - **🖥️ UI/UX**: Simple Natural Language Processing command input and on-screen history log.
 12 | 
 13 | ## 📲 Installation
 14 | 
 15 | 1. Clone this repository:
 16 | ```bash
 17 | git clone https://github.com/jasona7/ChatCVE.git
 18 | ```
 19 | 2. Enter the project directory:
 20 | ```bash
 21 | cd ChatCVE
 22 | ```
 23 | 3. Setup a Python environment:
 24 | ```bash
 25 | python3 -m venv .env
 26 | source ./env/bin/activate
 27 | ```
 28 | 4. Install Grype and Syft
 29 | ```bash
 30 | pip install syft
 31 | curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin
 32 | ```
 33 | 5. Install requirements
 34 | ```bash
 35 | pip install -r requirements.txt
 36 | ```
 37 | 6. Create the app_patrol and nvd_cves databases
 38 | ```bash
 39 | sqlite3> CREATE TABLE app_patrol (
 40 |     NAME TEXT,
 41 |     INSTALLED TEXT,
 42 |     FIXED_IN TEXT,
 43 |     TYPE TEXT,
 44 |     VULNERABILITY TEXT,
 45 |     SEVERITY TEXT,
 46 |     IMAGE_TAG TEXT,
 47 |      DATE_ADDED TEXT);
 48 | 
 49 | sqlite3> CREATE TABLE nvd_cves (
 50 |     cve_id TEXT PRIMARY KEY,
 51 |     source_id TEXT,
 52 |     published TEXT,
 53 |     last_modified TEXT,
 54 |     vuln_status TEXT,
 55 |     description TEXT,
 56 |     cvss_v30_vector_string TEXT,
 57 |     cvss_v30_base_score REAL,
 58 |     cvss_v30_base_severity TEXT,
 59 |     cvss_v2_vector_string TEXT,
 60 |     cvss_v2_base_score REAL,
 61 |     cvss_v2_base_severity TEXT,
 62 |     weakness TEXT,
 63 |     ref_info TEXT);
 64 | 
 65 | 5. Create an images.txt file with your images to scan.  Include the registry, repo, and version tag:
 66 | 
 67 | public.ecr.aws/tanzu_observability_demo_app/to-demo/inventory:latest
 68 | public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest
 69 | public.ecr.aws/tanzu_observability_demo_app/to-demo/delivery:latest
 70 | public.ecr.aws/tanzu_observability_demo_app/to-demo/warehouse:latest
 71 | public.ecr.aws/tanzu_observability_demo_app/to-demo/notification:latest
 72 | public.ecr.aws/tanzu_observability_demo_app/to-demo/styling:latest
 73 | public.ecr.aws/tanzu_observability_demo_app/to-demo/packaging:latest
 74 | public.ecr.aws/tanzu_observability_demo_app/to-demo/printing:latest
 75 | public.ecr.aws/tanzu_observability_demo_app/to-demo/payments:latest
 76 | public.ecr.aws/tanzu_observability_demo_app/to-demo/loadgen:latest
 77 | public.ecr.aws/amazoncorretto/amazoncorretto:20-al2-jdk
 78 | public.ecr.aws/docker/library/tomcat:9.0.75-jdk8-corretto-al2
 79 | public.ecr.aws/bitnami/minio:2023.5.18
 80 | public.ecr.aws/p4c2e2q6/miniamplify-x86:latest
 81 | public.ecr.aws/xray/aws-xray-daemon:3.3.7
 82 | public.ecr.aws/datadog/agent:7.45.0-rc.5
 83 | public.ecr.aws/aws-ec2/aws-node-termination-handler:v1.19.0
 84 | public.ecr.aws/aws-gcr-solutions/data-transfer-hub-ecr:v1.0.4
 85 | public.ecr.aws/bitnami/jenkins:2.387.3
 86 | ```
 87 | 
 88 | 
 89 | 
 90 | ## 💻 Usage
 91 | 1. Initiate a scan that will kick off the SBOM and CVE artifact creation.  SBOM reports will appear in output/sbom,
 92 | and scan summaries will appear in output/scan_summary.
 93 | ``` bash
 94 | python scan.py
 95 | ```
 96 | 
 97 | 2. Initiate an App Patrol scan which will create SBOM records in the SQLite3 backend:
 98 | ``` bash
 99 | python fetch_daily_nvd_cves.py
100 | ```
101 | 
102 | 3. Check the SBOM records have been added:
103 | ``` bash
104 | sqlite3 app_patrol.db
105 | sqlite> SELECT * FROM app_patrol LIMIT 10;
106 | tar|1.34+dfsg-1||deb|CVE-2005-2541|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
107 | login|1:4.8.1-1||deb|CVE-2007-5686|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
108 | passwd|1:4.8.1-1||deb|CVE-2007-5686|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
109 | libssl1.1|1.1.1n-0+deb11u3||deb|CVE-2007-6755|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
110 | openssl|1.1.1n-0+deb11u3||deb|CVE-2007-6755|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
111 | jetty-setuid-java|1.0.4||java-archive|CVE-2009-5045|High|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
112 | jetty-setuid-java|1.0.4||java-archive|CVE-2009-5046|Medium|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
113 | libssl1.1|1.1.1n-0+deb11u3||deb|CVE-2010-0928|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
114 | openssl|1.1.1n-0+deb11u3||deb|CVE-2010-0928|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
115 | libc-bin|2.31-13+deb11u3||deb|CVE-2010-4756|Negligible|public.ecr.aws/tanzu_observability_demo_app/to-demo/shopping:latest|2023-05-21 15:01:15
116 | ```
117 | 
118 | 4. Start a Chat-CVE OpenAI SQL Agent session (localhost:5000):
119 | 
120 | NOTE: Refine guardrails, temperature, etc to improve accuracy and output.
121 | ```bash
122 | python frontend/app.py
123 | ```
124 | ![CVE Query Interface](assets/chatcve_ui.png)
125 | 
126 | NOTE: chat_cve.py will let you launch a command line session.
127 | 
128 | Query at the prompt:
129 | ```bash
130 | Enter a question or type 'exit' to quit: Which NAME in app_patrol table has the most CRITICAL Severity records?
131 | ```
132 |     Expected Output:
133 | ```bash
134 | ** Thought: I should query the app_patrol table to get the name with the most Critical CVEs. **
135 | Thought: I should execute the query to get the results.
136 | Action: query_sql_db
137 | Action Input: SELECT NAME, COUNT(*) AS Top FROM app_patrol WHERE SEVERITY = 'Critical' GROUP BY NAME ORDER BY Top DESC LIMIT 3
138 | Observation: [('curl', 42), ('libcurl4', 42), ('libpcre2-8-0', 16)]
139 | Thought: I now know the final answer.
140 | Final Answer: The top 3 Names in the app_patrol table sorted by the top count of critical in the severity column are 'curl', 'libcurl4', and 'libpcre2-8-0'.
141 | ```
142 | 
143 | 
144 | ## 🌈 Software Supply Chain and Security Use Cases
145 | - **Security Analysts**: Assist Triage & find detailed CVE information quickly without dealing with intricate databases.
146 | - **Audit Teams**: Efficiently target auditing efforts and ensure compliance with security standards.
147 | - **Compliance Teams**: Maintain documentation and track usage for attestation efforts, ensuring all known libraries are documented.  Non technical personnel can simply use human langauge.
148 | - **Development Teams**: Efficiently target underlying libraries and get access to remediation suggestions.
149 | 
150 | ## ⭐⭐ Example prompt queries and results
151 | ```bash
152 | What percentage of records are for curl in the app_patrol table?
153 | 
154 | Thought: I should query the app_patrol table to get the percentage of records for curl.
155 | Action: query_sql_db
156 | Action Input: SELECT COUNT(*) * 100.0 / (SELECT COUNT(*) FROM app_patrol) FROM app_patrol WHERE NAME = 'curl'
157 | Observation: [(6.006697362913353,)]
158 | Thought: I now know the final answer.
159 | Final Answer: 6.006697362913353% of records in the app_patrol table are for curl.
160 | 
161 | How many critical records are there in the app_patrol table?
162 | 
163 | Thought: I should query the app_patrol table for the number of critical records.
164 | Action: query_sql_db
165 | Action Input: SELECT COUNT(*) FROM app_patrol WHERE SEVERITY = 'Critical'
166 | Observation: [(246,)]
167 | Thought: I now know the final answer.
168 | Final Answer: There are 246 critical records in the app_patrol table.
169 | 
170 | Which name in the app_patrol table has the most Critical Severity records?
171 | 
172 | Thought: I should query the app_patrol table to find the name with the most Critical Severity records.
173 | Action: query_sql_db
174 | Action Input: SELECT NAME, COUNT(*) AS count FROM app_patrol WHERE SEVERITY = 'Critical' GROUP BY NAME ORDER BY count DESC LIMIT 10;
175 | Observation: [('curl', 42), ('libcurl4', 42), ('libpcre2-8-0', 16), ('libksba8', 15), ('jetty-setuid-java', 14), ('libdb5.3', 9), ('libtasn1-6', 9), ('zlib1g', 8), ('System.Drawing.Common', 7), ('libexpat1', 7)]
176 | Thought: I now know the final answer.
177 | Final Answer: The name with the most Critical Severity records is 'curl' with 42 records.
178 | ```
179 | 
180 | 
181 | ## 🤝 Contributing
182 | We welcome your feedback! 🙌 
183 | For all significant changes, please open an issue first to discuss what you'd like to improve.
184 | 
185 | ## 📃 License
186 | Our project is licensed under the [MIT License](https://choosealicense.com/licenses/mit/).
187 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
  1 | # Release
  2 | 
  3 | ## Creating a release
  4 | 
  5 | This release process itself should be as automated as possible, and has only a few steps:
  6 | 
  7 | 1. **Trigger a new release with `make release`**. At this point you'll see a preview
  8 |   changelog in the terminal. If you're happy with the changelog, press `y` to continue, otherwise
  9 |   you can abort and adjust the labels on the PRs and issues to be included in the release and
 10 |   re-run the release trigger command.
 11 | 
 12 | 1. A release admin must approve the release on the GitHub Actions release pipeline run page.
 13 |    Once approved, the release pipeline will generate all assets and publish a GitHub Release.
 14 | 
 15 | 1. If there is a release Milestone, close it.
 16 | 
 17 | Ideally releasing should be done often with small increments when possible. Unless a
 18 | breaking change is blocking the release, or no fixes/features have been merged, a good
 19 | target release cadence is between every 1 or 2 weeks.
 20 | 
 21 | 
 22 | ## Retracting a release
 23 | 
 24 | If a release is found to be problematic, it can be retracted with the following steps:
 25 | 
 26 | - Deleting the GitHub Release
 27 | - Untag the docker images in the `ghcr.io` and `docker.io` registries
 28 | - Revert the brew formula in [`anchore/homebrew-syft`](https://github.com/anchore/homebrew-syft) to point to the previous release
 29 | - Add a new `retract` entry in the go.mod for the versioned release
 30 | 
 31 | **Note**: do not delete release tags from the git repository since there may already be references to the release
 32 | in the go proxy, which will cause confusion when trying to reuse the tag later (the H1 hash will not match and there
 33 | will be a warning when users try to pull the new release).
 34 | 
 35 | 
 36 | ## Background
 37 | 
 38 | A good release process has the following qualities:
 39 | 
 40 | 1. There is a way to plan what should be in a release
 41 | 1. There is a way to see what is actually in a release
 42 | 1. Allow for different kinds of releases (major breaking vs backwards compatible enhancements vs patch updates)
 43 | 1. Specify a repeatable way to build and publish software artifacts
 44 | 
 45 | 
 46 | ### Planning a release
 47 | 
 48 | To indicate a set of features to be released together add each issue to an in-repository
 49 | Milestone named with major-minor version to be released (e.g. `v0.1`). It is OK for other
 50 | features to be in the release that were not originally planned, and these issues and PRs
 51 | do not need to be added to the Milestone in question. Only the set of features that, when
 52 | completed, would allow the release to be considered complete. A Milestone is only used to:
 53 | 
 54 | - Plan what is desired to be in a release
 55 | - Track progress to indicate when we may be ready to cut a new release
 56 | 
 57 | Not all releases need to be planned. For instance, patch releases for fixes should be
 58 | released when they are ready and when releasing would not interfere with another current
 59 | release (where some partial or breaking features have already been merged).
 60 | 
 61 | Unless necessary, feature releases should be small and frequent, which may obviate the
 62 | need for regular release planning under a Milestone.
 63 | 
 64 | 
 65 | ### What is in a release
 66 | 
 67 | Milestones are specifically for planning a release, not necessarily tracking all changes
 68 | that a release may bring (and more importantly, not all releases are necessarily planned
 69 | either).
 70 | 
 71 | This is one of the (many) reasons for a Changelog. A good Changelog lists changes grouped
 72 | by the type of change (new, enhancement, deprecation, breaking, bug fix, security fix), in
 73 | chronological order (within groups), linking the PR where the change was made in the
 74 | Changelog line. Furthermore, there should be a place to see all released versions, the
 75 | release date for each release, the semantic version of the release, and the set of changes
 76 | for each release.
 77 | 
 78 | **This project auto-generates the Changelog contents for each current release and posts the
 79 | generated contents to the GitHub Release page**. Leveraging the GitHub Releases feature
 80 | allows GitHub to manage the Changelog on each release outside of the git source tree while
 81 | still being hosted with the released assets.
 82 | 
 83 | The Changelog is generated from the metadata from in-repository issues and PRs, using
 84 | labels to guide what kind of change each item is (e.g. breaking, new feature, bug fix,
 85 | etx). Only issues/PRs with select labels are included in the Changelog, and only if the
 86 | issue/PR was created after the last release. Additional labels are used to exclude items
 87 | from the Changelog.
 88 | 
 89 | The above suggestions imply that we should:
 90 | 
 91 | - Ensure there is a sufficient title for each PR and issue title to be included in the
 92 |   Changelog
 93 | - The appropriate label is applied to PRs and/or issues to drive specific change type
 94 |   sections (deprecated, breaking, security, bug, etc)
 95 | 
 96 | **With this approach as we cultivate good organization of PRs and issues we automatically
 97 | get an equally good Changelog.**
 98 | 
 99 | 
100 | ### Major, minor, and patch releases
101 | 
102 | The latest version of the tool is the only supported version, which implies that multiple
103 | parallel release branches will not be a regular process (if ever). Multiple releases can
104 | be planned in parallel, however, only one can be actively developed at a time. That is, if
105 | PRs attached to a release Milestone have been merged into the main branch, that release is
106 | now the "next" release. **This implies that the source of truth for release lies with the
107 | git log and Changelog, not with the release Milestones** (which are purely for planning and
108 | tracking).
109 | 
110 | Semantic versioning should be used to indicate breaking changes, new features, and fixes.
111 | The exception to this is `< 1.0`, where the major version is not bumped for breaking changes,
112 | instead the minor version indicates both new features and breaking changes.
113 | 


--------------------------------------------------------------------------------
/__pycache__/scan.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/__pycache__/scan.cpython-310.pyc


--------------------------------------------------------------------------------
/app_patrol.d:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/app_patrol.d


--------------------------------------------------------------------------------
/app_patrol.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/app_patrol.db


--------------------------------------------------------------------------------
/artifacthub-repo.yml:
--------------------------------------------------------------------------------
1 | # See documentation here: https://github.com/artifacthub/hub/blob/v1.6.0/docs/metadata/artifacthub-repo.yml
2 | repositoryID: eced152f-b15d-4879-8b3b-1175397192ba
3 | owners:
4 |   - name: wagoodman
5 |     email: wagoodman@gmail.com
6 | 


--------------------------------------------------------------------------------
/assets/chatcve_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/assets/chatcve_ui.png


--------------------------------------------------------------------------------
/chat_cve.py:
--------------------------------------------------------------------------------
 1 | from langchain.sql_database import SQLDatabase
 2 | from langchain.llms.openai import OpenAI
 3 | from langchain.agents import create_sql_agent, AgentExecutor
 4 | from langchain_community.agent_toolkits import SQLDatabaseToolkit
 5 | import os
 6 | 
 7 | # Initialize your LLM (Language Learning Model) with OpenAI api key environment variable named openai_api_key
 8 | 
 9 | llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
10 | 
11 | # Define the SQLDatabaseToolkit connection to the App_Patrol Database
12 | db = SQLDatabase.from_uri("sqlite:////ChatCVE/app_patrol.db")
13 | toolkit = SQLDatabaseToolkit(db=db, llm=llm)  # Now passing both db and llm to SQLDatabaseToolkit
14 | 
15 | agent_executor = create_sql_agent(
16 |     llm=llm,
17 |     toolkit=toolkit,
18 |     verbose=True
19 | )
20 | 
21 | #Take user input from the command line and run the agent on it
22 | while True:
23 |     guardrails = "Do not use sql LIMIT in the results. "
24 |     user_input = input("Enter a question or type 'exit' to quit: ")
25 |     if user_input.lower() == 'exit':
26 |         break
27 |     
28 |     # Prepending guardrails to user_input before running
29 |     safe_user_input = guardrails + user_input
30 |     agent_executor.run(safe_user_input)
31 | 


--------------------------------------------------------------------------------
/fetch_daily_nvd_cves.py:
--------------------------------------------------------------------------------
  1 | import urllib.request
  2 | import urllib.parse
  3 | import json
  4 | import sqlite3
  5 | from datetime import datetime, timedelta
  6 | import logging
  7 | 
  8 | 
  9 | # Set up logging
 10 | logging.basicConfig(
 11 |     filename='app.log', 
 12 |     filemode='a', 
 13 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 
 14 |     level=logging.INFO
 15 | )
 16 | 
 17 | # Start time
 18 | start_time = datetime.now()
 19 | 
 20 | # Get current UTC time and 24 hours earlier
 21 | now = datetime.utcnow()
 22 | one_day_ago = now - timedelta(days=1)
 23 | 
 24 | # Format the times as strings in the required format
 25 | now_str = now.strftime("%Y-%m-%dT%H:%M:%S") + '.999-05:00'
 26 | one_day_ago_str = one_day_ago.strftime("%Y-%m-%dT%H:%M:%S") + '.000-05:00'
 27 | 
 28 | # Construct the URL
 29 | base_url = "https://services.nvd.nist.gov/rest/json/cves/2.0"
 30 | query_params = {
 31 |     "pubStartDate": one_day_ago_str,
 32 |     "pubEndDate": now_str
 33 | }
 34 | url = base_url + "?" + urllib.parse.urlencode(query_params)
 35 | 
 36 | # Make the request and parse the response
 37 | response = urllib.request.urlopen(url)
 38 | data = json.loads(response.read().decode())
 39 | 
 40 | # Open a connection to the SQLite database and create a cursor object
 41 | conn = sqlite3.connect('../app_patrol.db')
 42 | cursor = conn.cursor()
 43 | 
 44 | count = 0
 45 | severity_count = {}
 46 | 
 47 | # For each CVE in the response, insert the data into the nvd_cves table
 48 | for vuln in data['vulnerabilities']:
 49 |     count += 1
 50 |     cve = vuln['cve']
 51 |     metric_v3 = cve['metrics']['cvssMetricV30'][0]['cvssData'] if cve['metrics'].get('cvssMetricV30') else {}
 52 |     metric_v2 = cve['metrics']['cvssMetricV2'][0]['cvssData'] if cve['metrics'].get('cvssMetricV2') else {}
 53 | 
 54 |     severity = metric_v3.get('baseSeverity', 'N/A')
 55 |     severity_count[severity] = severity_count.get(severity, 0) + 1
 56 | 
 57 |     cursor.execute("""
 58 |     INSERT OR REPLACE INTO nvd_cves
 59 |         (cve_id, source_id, published, last_modified, vuln_status, description,
 60 |         cvss_v30_vector_string, cvss_v30_base_score, cvss_v30_base_severity,
 61 |         cvss_v2_vector_string, cvss_v2_base_score, cvss_v2_base_severity,
 62 |         weakness, ref_info) 
 63 |     VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 64 |     """,
 65 |                    (cve['id'],
 66 |                     cve['sourceIdentifier'],
 67 |                     cve['published'],
 68 |                     cve['lastModified'],
 69 |                     cve['vulnStatus'],
 70 |                     cve['descriptions'][0]['value'] if cve.get('descriptions') else None,
 71 |                     metric_v3.get('vectorString'),
 72 |                     metric_v3.get('baseScore'),
 73 |                     metric_v3.get('baseSeverity'),
 74 |                     metric_v2.get('vectorString'),
 75 |                     metric_v2.get('baseScore'),
 76 |                     metric_v2.get('baseSeverity'),
 77 |                     cve['weaknesses'][0]['description'][0]['value'] if cve.get('weaknesses') else None,
 78 |                     json.dumps(cve['references'])))
 79 | 
 80 | # Commit the changes and close the connection
 81 | conn.commit()
 82 | conn.close()
 83 | 
 84 | # End time
 85 | end_time = datetime.now()
 86 | 
 87 | # Calculate execution time
 88 | execution_time = end_time - start_time
 89 | 
 90 | # Write summary to log file
 91 | log_dir = '/ChatCVE/logs/'
 92 | log_filename = now.strftime("%Y-%m-%d_%H_%M_%S_fetch_summary.log").replace(':', '_').replace('/', '_')
 93 | with open(log_dir + log_filename, 'w') as f:
 94 |     f.write(f"Script execution summary:\n")
 95 |     f.write(f"Records created or updated: {count}\n")
 96 |     f.write(f"Execution time: {execution_time}\n")
 97 |     f.write(f"Severity count:\n")
 98 |     for severity, count in severity_count.items():
 99 |         f.write(f"{severity}: {count}\n")
100 | 


--------------------------------------------------------------------------------
/frontend/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, request, redirect, url_for
 2 | from langchain.sql_database import SQLDatabase
 3 | from langchain.llms.openai import OpenAI
 4 | from langchain.agents import create_sql_agent
 5 | from langchain_community.agent_toolkits import SQLDatabaseToolkit
 6 | import os
 7 | import re
 8 | from sqlalchemy.exc import SQLAlchemyError
 9 | 
10 | app = Flask(__name__)
11 | 
12 | # Initialize LLM with OpenAI API key
13 | llm = OpenAI(openai_api_key=os.environ.get("OPENAI_API_KEY"))
14 | 
15 | # Define the SQLDatabaseToolkit connection
16 | db = SQLDatabase.from_uri("sqlite:////ChatCVE/app_patrol.db")
17 | toolkit = SQLDatabaseToolkit(db=db, llm=llm)
18 | 
19 | agent_executor = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True)
20 | 
21 | # History of questions and answers
22 | history = []
23 | 
24 | def execute_sql_query(query):
25 |     try:
26 |         # Assuming db.session.execute is the correct way to run queries with SQLDatabase
27 |         result = db.session.execute(query)
28 |         return [dict(row) for row in result.fetchall()]
29 |     except SQLAlchemyError as e:
30 |         return str(e)
31 | 
32 | @app.route('/', methods=['GET', 'POST'])
33 | def home():
34 |     if request.method == 'POST':
35 |         user_input = request.form.get('question')
36 |         if user_input:
37 |             guardrails = "Do not use sql LIMIT in the results.  the tables in the database are nvd_findings and also app_patrol.  Output should only be the SL query result."
38 |             safe_user_input = guardrails + user_input
39 |             response = agent_executor.run(safe_user_input)
40 | 
41 |             # Check if the response is a SQL statement
42 |             if re.match(r"\s*SELECT\s+", response, re.IGNORECASE):
43 |                 # Execute the SQL query and get the results
44 |                 results = execute_sql_query(response)
45 |                 # Format the results as a string or handle as needed
46 |                 formatted_results = ', '.join([str(row) for row in results])
47 |                 response = formatted_results
48 | 
49 |             # Insert the new entry at the beginning of the history list
50 |             history.insert(0, (user_input, response))
51 | 
52 |     return render_template('index.html', history=history)
53 | 
54 | if __name__ == '__main__':
55 |     app.run(debug=True)
56 | 


--------------------------------------------------------------------------------
/frontend/static/style.css:
--------------------------------------------------------------------------------
  1 | /* Use Google Fonts for better typography */
  2 | @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
  3 | 
  4 | /* Basic reset */
  5 | * {
  6 |     box-sizing: border-box;
  7 |     margin: 10px;
  8 |     padding: 0;
  9 | }
 10 | 
 11 | body {
 12 |     font-family: 'Roboto', sans-serif;
 13 |     background-color: #f4f4f4;
 14 |     line-height: 1.6;
 15 |     padding: 20px;
 16 | }
 17 | 
 18 | .container {
 19 |     max-width: 800px;
 20 |     margin: 20px auto;
 21 |     padding: 20px;
 22 |     background: #fff;
 23 |     box-shadow: 0 3px 7px rgba(0, 0, 0, 0.1);
 24 | }
 25 | 
 26 | h1 {
 27 |     color: #333;
 28 |     margin-bottom: 1rem;
 29 | }
 30 | 
 31 | .question-form input[type="text"] {
 32 |     width: 70%;
 33 |     padding: 10px;
 34 |     margin-right: 10px;
 35 |     border: 1px solid #ccc;
 36 |     border-radius: 4px;
 37 | }
 38 | 
 39 | .question-form input[type="submit"] {
 40 |     padding: 10px 20px;
 41 |     border: none;
 42 |     border-radius: 4px;
 43 |     background: #007bff;
 44 |     color: #fff;
 45 |     cursor: pointer;
 46 | }
 47 | 
 48 | .question-form input[type="submit"]:hover {
 49 |     background: #0056b3;
 50 | }
 51 | 
 52 | .question-input {
 53 |     width: 100%; /* Make the input stretch to the full width of its container */
 54 |     padding: 15px; /* Increase padding for larger touch area and better visibility */
 55 |     font-size: 1.25rem; /* Increase font size for better readability */
 56 |     margin-bottom: 10px; /* Add some space below the input field */
 57 |     border: 2px solid #007bff; /* Add a border that stands out */
 58 |     border-radius: 4px; /* Slightly rounded corners for a modern look */
 59 |     box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1); /* Subtle inner shadow for depth */
 60 | }
 61 | 
 62 | .submit-btn {
 63 |     padding: 15px 30px; /* Larger padding */
 64 |     font-size: 1.25rem; /* Increase font size to match the input field */
 65 |     text-transform: uppercase; /* Optional: uppercase text for the button */
 66 |     letter-spacing: 1px; /* Optional: spacing out letters a bit */
 67 |     border-radius: 4px; /* Match the border radius of the input */
 68 |     border: 2px solid transparent; /* Hide border */
 69 |     background-color: #007bff; /* Button color */
 70 |     color: white; /* Text color */
 71 |     cursor: pointer; /* Cursor to indicate it's clickable */
 72 |     transition: background-color 0.3s ease; /* Smooth transition for hover effect */
 73 | }
 74 | 
 75 | .submit-btn:hover {
 76 |     background-color: #0056b3; /* Darker shade when hovered */
 77 | }
 78 | 
 79 | .history {
 80 |     list-style-type: none;
 81 |     margin-top: 2rem;
 82 | }
 83 | 
 84 | .history-item {
 85 |     background-color: #f9f9f9;
 86 |     border-left: 5px solid #007bff;
 87 |     margin-bottom: 10px;
 88 |     padding: 10px;
 89 | }
 90 | 
 91 | /* Responsive adjustments */
 92 | @media (max-width: 768px) {
 93 |     .container {
 94 |         width: 95%; /* Slight padding from the edges on smaller screens */
 95 |     }
 96 | 
 97 |     .question-form {
 98 |         flex-direction: column; /* Stack input and button on top of each other */
 99 |     }
100 | 
101 |     .submit-btn {
102 |         width: 100%; /* Full width button on smaller screens */
103 |         margin-top: 10px; /* Add space between input and button */
104 |     }
105 | 
106 |     .history-table {
107 |         width: 100%; /* Full width table */
108 |         border-collapse: collapse; /* Collapse borders */
109 |     }
110 |     
111 |     .history-item {
112 |         background-color: #f9f9f9; /* Light grey background for each row */
113 |         border-bottom: 1px solid #e1e1e1; /* Separator for rows */
114 |     }
115 |     
116 |     .history-item:last-child {
117 |         border-bottom: none; /* No border for the last row */
118 |     }
119 |     
120 |     /* Remove width and margin from .question and .answer to let flex handle the sizing */
121 | .question,
122 | .answer {
123 |     padding: 10px; /* Spacing inside cells */
124 |     /* width: flex; This line is commented out because 'flex' is not a valid value for width */
125 |     font-weight: bold; /* Make question text bold */
126 |     /* margin-left: 15px; */
127 |     /* margin-right: 15px; */
128 |     /* margin-top: 8px; */
129 | }
130 | 
131 | /* Add display flex to .history-item to allow flexible space distribution between question and answer */
132 | .history-item {
133 |     display: flex;
134 |     align-items: flex-start; /* Align children to the start of the cross axis */
135 |     background-color: #f9f9f9;
136 |     border-left: 5px solid #007bff;
137 |     margin-bottom: 10px;
138 |     padding: 10px;
139 | }
140 | 
141 | /* Ensure that the direct children of .history-item (typically divs for Q and A) take full width if needed */
142 | .history-item > div {
143 |     flex: 1; /* Grow to use the available space */
144 |     margin-bottom: 0.5rem; /* Space between question and answer */
145 | }
146 | 
147 | /* Align the content of the Q and A containers at the start */
148 | .history-item .question, 
149 | .history-item .answer {
150 |     align-self: flex-start;
151 |     width: 100%; /* Ensure each takes full width of the flex container */
152 | }
153 | 
154 | 
155 | .question {
156 |     flex: 1 100%; /* Allow question to grow and ensure it takes full width on wrap */
157 |     font-weight: bold; /* Make question text bold */
158 |     margin-bottom: 0.5rem; /* Add some space below the question */
159 | }
160 | 
161 | .answer {
162 |     flex: 3 100%; /* Allow answer to take the remaining space and full width on wrap */
163 |     word-break: break-word; /* Ensure long words do not overflow */
164 |     margin-bottom: 0.5rem; /* Add some space below the answer for when they stack on small screens */
165 | }
166 | 
167 | /* ... other styles ... */
168 | 
169 | @media (max-width: 768px) {
170 |     .history-item {
171 |         flex-direction: column; /* Stack question and answer on top of each other */
172 |     }
173 | 
174 |     .question,
175 |     .answer {
176 |         width: 100%; /* Full width for both question and answer on smaller screens */
177 |     }
178 | }


--------------------------------------------------------------------------------
/frontend/templates/index.html:
--------------------------------------------------------------------------------
 1 |     <!DOCTYPE html>
 2 |     <html lang="en">
 3 |     <head>
 4 |         <meta charset="UTF-8">
 5 |         <title>ChatCVE Questions</title>
 6 |         <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
 7 |         <link href="https://fonts.googleapis.com/css?family=Roboto&display=swap" rel="stylesheet">
 8 |         <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" rel="stylesheet">
 9 |     </head>
10 |     <body>
11 |         <div class="container">
12 |             <h1>Ask a Question about the CVE Scans</h1>
13 |             <form method="post" class="question-form">
14 |                 <input type="text" name="question" placeholder="Enter your question here..." class="question-input">
15 |                 <button type="submit" class="submit-btn">Submit</button>
16 |             </form>
17 |             
18 |             <div class="history">
19 |                 <h2>History</h2>
20 |                 {% if history %}
21 |                     <table class="history-table">
22 |                         {% for q, a in history %}
23 |                             <tr class="history-item">
24 |                                 <td class="question"><strong>Q:</strong> {{ q }}</td>
25 |                                 <td class="answer"><strong>A:</strong> {{ a }}</td>
26 |                             </tr>
27 |                         {% endfor %}
28 |                     </table>
29 |                 {% else %}
30 |                     <p>No history yet.</p>
31 |                 {% endif %}
32 |             </div>
33 |             
34 |         </div>
35 |     </body>
36 |     </html>
37 | 


--------------------------------------------------------------------------------
/images.txt:
--------------------------------------------------------------------------------
1 | public.ecr.aws/xray/aws-xray-daemon:3.3.7
2 | public.ecr.aws/eks-distro/kubernetes-csi/node-driver-registrar:v2.8.0-eks-1-27-4
3 | 


--------------------------------------------------------------------------------
/node_modules/.package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "ChatCVE",
3 |   "lockfileVersion": 3,
4 |   "requires": true,
5 |   "packages": {}
6 | }
7 | 


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "ChatCVE",
3 |   "lockfileVersion": 3,
4 |   "requires": true,
5 |   "packages": {}
6 | }
7 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.146
2 | urllib3==1.26.15
3 | openai==0.27.7
4 | 


--------------------------------------------------------------------------------
/scan.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import subprocess
  4 | import json
  5 | import datetime
  6 | from pathlib import Path
  7 | import sqlite3
  8 | import logging
  9 | from logging.handlers import TimedRotatingFileHandler
 10 | 
 11 | # Set up logging with rotation at midnight and keeping 7 days history
 12 | logger = logging.getLogger("ChatCVELogger")
 13 | logger.setLevel(logging.INFO)
 14 | handler = TimedRotatingFileHandler('ChatCVE_logs.log', when="midnight", interval=1, backupCount=7)
 15 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y%m%d%H%M%S')
 16 | handler.setFormatter(formatter)
 17 | logger.addHandler(handler)
 18 | 
 19 | def syft_scan(image):
 20 |     syft_executable = '/usr/bin/syft'  # Adjust the full path to syft as needed
 21 |     try:
 22 |         result = subprocess.run([syft_executable, '-o', 'cyclone-dx-json', image], capture_output=True, text=True)
 23 |         if result.returncode != 0:
 24 |             logger.error(f"Error executing syft command on image: {image}")
 25 |             logger.error(f"Error details: {result.stderr.strip()}")
 26 |             return None
 27 |         return json.loads(result.stdout)
 28 |     except json.JSONDecodeError as e:
 29 |         logger.error(f"Error parsing JSON output for image: {image}: {e}")
 30 |         return None
 31 | 
 32 | def grype_scan(image):
 33 |     try:
 34 |         result = subprocess.run(['grype', '-o', 'json', image], capture_output=True, text=True)
 35 |         if result.returncode != 0:
 36 |             logger.error(f"Error executing grype command on image: {image}: {result.stderr.strip()}")
 37 |             return None
 38 |         return json.loads(result.stdout)
 39 |     except json.JSONDecodeError as e:
 40 |         logger.error(f"Error parsing JSON output for image: {image}: {e}")
 41 |         return None
 42 | 
 43 | def write_to_db(db_name, scan_result, image_name):
 44 |     try:
 45 |         conn = sqlite3.connect(db_name)
 46 |         cursor = conn.cursor()
 47 |         for vulnerability in scan_result.get('matches', []):
 48 |             name = vulnerability.get('artifact', {}).get('name')
 49 |             installed = vulnerability.get('artifact', {}).get('version')
 50 |             fixed_in = vulnerability.get('vulnerability', {}).get('fixedInVersion')
 51 |             type = vulnerability.get('artifact', {}).get('type')
 52 |             vulnerability_id = vulnerability.get('vulnerability', {}).get('id')
 53 |             severity = vulnerability.get('vulnerability', {}).get('severity')
 54 |             cursor.execute("INSERT INTO app_patrol (NAME, INSTALLED, FIXED_IN, TYPE, VULNERABILITY, SEVERITY, IMAGE_TAG, DATE_ADDED) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (name, installed, fixed_in, type, vulnerability_id, severity, image_name))
 55 |         conn.commit()
 56 |     except sqlite3.Error as e:
 57 |         logger.error(f"SQLite error: {e}")
 58 |     except Exception as e:
 59 |         logger.error(f"Unexpected error when writing to DB: {e}")
 60 |     finally:
 61 |         conn.close()
 62 | 
 63 | if not Path('images.txt').is_file():
 64 |     logger.error("The file 'images.txt' does not exist.")
 65 |     images = []
 66 | else:
 67 |     with open('images.txt') as f:
 68 |         images = [line.strip() for line in f if line.strip()]
 69 | 
 70 | successful_scans = 0
 71 | start_time = datetime.datetime.now()
 72 | 
 73 | # Adjust the base directory to your project's needs
 74 | base_dir = Path(__file__).parent
 75 | scan_output_rootdir = base_dir / 'output'
 76 | scan_output_sbom_subdir = scan_output_rootdir / 'sbom'
 77 | scan_output_summary_subdir = scan_output_rootdir / 'scan_summary'
 78 | 
 79 | # Ensure directories exist
 80 | scan_output_sbom_subdir.mkdir(parents=True, exist_ok=True)
 81 | scan_output_summary_subdir.mkdir(parents=True, exist_ok=True)
 82 | 
 83 | for image in images:
 84 |     result = syft_scan(image)
 85 |     if result is None:
 86 |         continue
 87 | 
 88 |     now = datetime.datetime.now()
 89 |     formatted_now = now.strftime("%Y%m%d")
 90 | 
 91 |     # Correct directory for SBOM .json files
 92 |     scan_output_sbom_subdir.mkdir(parents=True, exist_ok=True)
 93 |     
 94 |     filename = image.replace('/', '_').replace(':', '__') + '.json'
 95 |     sbom_filename = scan_output_sbom_subdir / f"{formatted_now}_{filename}"  # Corrected path for SBOM files
 96 | 
 97 |     try:
 98 |         with open(sbom_filename, 'w') as f:  # Use sbom_filename for SBOM files
 99 |             json.dump(result, f)
100 |             successful_scans += 1
101 |     except IOError as e:
102 |         logger.error(f"Error writing to file: {sbom_filename}: {e}")
103 | 
104 |     grype_result = grype_scan(image)
105 |     if grype_result is not None:
106 |         write_to_db('app_patrol.db', grype_result, image)
107 | 
108 | execution_time = datetime.datetime.now() - start_time
109 | summary = f"Scanned {successful_scans} images\n" \
110 |           f"Results stored in {successful_scans} files\n" \
111 |           f"Total number of images scanned: {len(images)}\n" \
112 |           f"Total execution time: {execution_time}\n"
113 | logger.info(summary)
114 | 
115 | # Write summary to a file in the scan_summary directory
116 | summary_file_path = scan_output_summary_subdir / f"{now.strftime('%Y%m%d%H%M%S')}_summary.txt"
117 | with open(summary_file_path, 'w') as f:
118 |     f.write(summary)
119 | 


--------------------------------------------------------------------------------
/tests/__pycache__/test_scan.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jasona7/ChatCVE/c7063214401f3c6b2702d9d37215697c8b826908/tests/__pycache__/test_scan.cpython-310.pyc


--------------------------------------------------------------------------------
/tests/test_scan.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch
 3 | from scan import syft_scan
 4 | 
 5 | class TestSyftScan(unittest.TestCase):
 6 |     @patch('scan.subprocess.run')
 7 |     def test_syft_scan_success(self, mock_run):
 8 |         # Mock subprocess.run to simulate syft command success
 9 |         mock_run.return_value.returncode = 0
10 |         mock_run.return_value.stdout = '{"vulnerabilities": []}'  # Example JSON output
11 | 
12 |         result = syft_scan("dummy_image")
13 |         self.assertIsNotNone(result)
14 |         self.assertEqual(result, {"vulnerabilities": []})
15 | 
16 |     @patch('scan.subprocess.run')
17 |     def test_syft_scan_failure(self, mock_run):
18 |         # Mock subprocess.run to simulate syft command failure
19 |         mock_run.return_value.returncode = 1
20 |         mock_run.return_value.stderr = "error message"
21 | 
22 |         result = syft_scan("dummy_image")
23 |         self.assertIsNone(result)
24 | 
25 | if __name__ == '__main__':
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------