├── .chglog ├── CHANGELOG.tpl.md └── config.yml ├── .github └── workflows │ ├── docs.yml_disable │ ├── go.yml │ ├── license.yml │ └── python.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE.txt ├── LICENSES ├── Apache-2.0.txt └── CC0-1.0.txt ├── MSR2019 ├── README.md ├── dataset │ └── vulas_db_msr2019_release.csv └── notebooks │ ├── acacia │ ├── __init__.py │ ├── commit_in_cve.py │ ├── git.py │ ├── git_temporal.py │ └── utils.py │ ├── dataset_msr2019.csv │ ├── msr2019.ipynb │ └── msr2019 │ ├── .keep │ ├── img │ └── .keep │ └── tex │ └── .keep ├── Makefile ├── README.md ├── REUSE.toml ├── docs ├── contributing.md ├── css │ ├── buttons.css │ └── faq.css ├── faq.md ├── index.md ├── kaybee-arch.excalidraw ├── kaybee-arch.png ├── kaybee.md ├── kaybee │ ├── dev_setup.md │ └── guidelines.md ├── prospector.md ├── prospector │ ├── dev_setup.md │ └── issues.md ├── report_casestudy_complete.png ├── team.md └── user_manual.md ├── kaybee ├── .pre-commit-config.yaml ├── Makefile ├── README.md ├── VERSION ├── cmd │ ├── cmd_test.go │ ├── create.go │ ├── export.go │ ├── import.go │ ├── list.go │ ├── merge.go │ ├── pull.go │ ├── purge.go │ ├── push.go │ ├── reconcile.go │ ├── root.go │ ├── setup.go │ ├── update.go │ └── version.go ├── go.mod ├── go.sum ├── internal │ ├── browser │ │ └── browser.go │ ├── conf │ │ ├── conf.go │ │ ├── conf_test.go │ │ ├── parse.go │ │ ├── v1 │ │ │ └── conf_v1.go │ │ └── v2 │ │ │ └── conf_v2.go │ ├── errors │ │ └── errors.go │ ├── filesystem │ │ ├── file.go │ │ └── file_test.go │ ├── model │ │ ├── bug.go │ │ ├── mergelog.go │ │ ├── mergelog_test.go │ │ ├── policy.go │ │ ├── policy_null.go │ │ ├── policy_smart.go │ │ ├── policy_soft.go │ │ ├── policy_strict.go │ │ ├── policy_test.go │ │ ├── statement.go │ │ └── statement_test.go │ ├── repository │ │ ├── repository.go │ │ └── repository_test.go │ └── tasks │ │ ├── create.go │ │ ├── data │ │ └── default_config.yaml │ │ ├── export.go │ │ ├── import.go │ │ ├── merge.go │ │ ├── pull.go │ │ ├── reconcile.go │ │ ├── setup.go │ │ ├── task.go │ │ └── tasks_test.go ├── main.go └── testdata │ ├── conf │ ├── kaybeeconf.yaml │ ├── kaybeeconf_noversion.yaml │ ├── sample_kbsync_exclude_regex.yaml │ ├── sample_kbsync_invalid_policy.yaml │ ├── sample_kbsync_malformed.yaml │ └── sample_kbsync_nobackend.yaml │ ├── statements │ ├── statement_affected_artifacts.yaml │ └── statement_commits.yaml │ └── steady │ ├── all_bugs.json │ └── cve-2018-11040.json ├── mkdocs.yml ├── prospector ├── .coveragerc ├── .env-sample ├── .flake8 ├── .pre-commit-config.yaml ├── .pylintrc ├── Makefile ├── README.md ├── backenddb │ ├── README.md │ ├── __init__.py │ ├── commitdb_test.py │ └── postgres.py ├── cli │ ├── __init__.py │ ├── console.py │ └── main.py ├── commitdb │ └── postgres.py ├── config-sample.yaml ├── core │ ├── __init__.py │ ├── prospector.py │ ├── prospector_test.py │ ├── report.py │ ├── report_test.py │ └── templates │ │ ├── base.html │ │ ├── card │ │ ├── changed_paths_block.html │ │ ├── commit_header.html │ │ ├── commit_title_block.html │ │ ├── matched_rules_block.html │ │ ├── message_block.html │ │ ├── pages_linked_from_advisories_block.html │ │ └── twin_list_block.html │ │ ├── collapse_all_scripts.html │ │ ├── filtering_scripts.html │ │ ├── report_header.html │ │ ├── results.html │ │ └── titled_block.html ├── data │ └── project_metadata.json ├── datamodel │ ├── __init__.py │ ├── advisory.py │ ├── advisory_test.py │ ├── commit.py │ ├── commit_test.py │ ├── constants.py │ ├── nlp.py │ ├── nlp_test.py │ └── user.py ├── ddl │ ├── 10_commit.sql │ ├── 20_users.sql │ ├── 30_vulnerability.sql │ ├── 40_processed_vuln.sql │ ├── 50_job.sql │ └── 60_alter_commit.sql ├── docker-compose.yml ├── docker │ ├── Dockerfile │ ├── cli │ │ └── Dockerfile │ ├── service │ │ ├── Dockerfile │ │ └── start.sh │ └── worker │ │ ├── Dockerfile │ │ ├── etc_supervisor_confd_rqworker.conf.j2 │ │ └── start_rq_worker.sh ├── docs │ ├── img │ │ ├── prospector-assuremoss-arch.png │ │ ├── prospector-assuremoss.excalidraw │ │ └── prospector-assuremoss.png │ └── prospector-assuremos.excalidraw ├── evaluation │ ├── README.md │ ├── __init__.py │ ├── analyse.py │ ├── analyse_statistics.py │ ├── config-sample.yaml │ ├── data │ │ └── results │ │ │ ├── summary_execution_checkmarks_table.tex │ │ │ ├── summary_execution_flow-analysis.json │ │ │ ├── summary_execution_mvi_table.tex │ │ │ ├── summary_execution_mvi_with_llm.json │ │ │ ├── summary_execution_mvi_without_llm.json │ │ │ └── summary_execution_nvi_table.tex │ ├── dispatch_jobs.py │ ├── main.py │ └── utils.py ├── filtering │ ├── __init__.py │ └── filter.py ├── git │ ├── __init__.py │ ├── exec.py │ ├── git.py │ ├── git_test.py │ ├── raw_commit.py │ ├── raw_commit_test.py │ ├── version_to_tag.py │ └── version_to_tag_test.py ├── llm │ ├── instantiation.py │ ├── llm_service.py │ ├── llm_service_test.py │ ├── models │ │ ├── anthropic.py │ │ ├── gemini.py │ │ ├── mistral.py │ │ └── openai.py │ └── prompts │ │ ├── classify_commit.py │ │ └── get_repository_url.py ├── log │ ├── __init__.py │ └── logger.py ├── pipeline │ ├── README.md │ ├── __init__.py │ ├── cloning_repos.py │ ├── filter_entries.py │ ├── job_creation.py │ ├── main.py │ ├── version_extraction_test.py │ └── versions_extraction.py ├── prospector.conf ├── pyproject.toml ├── requirements-dev.txt ├── requirements.in ├── requirements.txt ├── rules │ ├── __init__.py │ ├── helpers.py │ ├── helpers_test.py │ ├── rules.py │ └── rules_test.py ├── run_prospector.sh ├── service │ ├── api │ │ ├── README.md │ │ ├── __init__.py │ │ ├── api_test.py │ │ ├── dependencies.py │ │ └── routers │ │ │ ├── __init__.py │ │ │ ├── endpoints.py │ │ │ ├── feeds.py │ │ │ ├── home.py │ │ │ ├── jobs.py │ │ │ ├── nvd.py │ │ │ ├── nvd_feed_update.py │ │ │ ├── preprocessed.py │ │ │ └── users.py │ ├── main.py │ └── static │ │ ├── feed.html │ │ ├── feed.js │ │ ├── index.css │ │ ├── index.html │ │ ├── index.js │ │ ├── job_configuration.css │ │ ├── job_configuration.html │ │ ├── job_configuration.js │ │ ├── job_info.css │ │ ├── job_info.html │ │ ├── job_info.js │ │ └── report_list.html ├── stats │ ├── __init__.py │ ├── collection.py │ ├── collection_test.py │ ├── execution.py │ └── execution_test.py └── util │ ├── __init__.py │ ├── config_parser.py │ ├── config_parser_test.py │ ├── http.py │ ├── inspection.py │ ├── lsh.py │ ├── profile.py │ ├── report_analyzer.py │ ├── sample_data_generation.py │ ├── similarity.py │ ├── singleton.py │ ├── test_type_safety.py │ ├── tokenize.py │ └── type_safety.py ├── references ├── others.bib └── ours.bib ├── scripts ├── bib2md.py ├── changelog-gen.py ├── release.sh └── requirements.txt └── vulnerability-data └── README.md /.chglog/CHANGELOG.tpl.md: -------------------------------------------------------------------------------- 1 | {{ range .Versions }} 2 | 3 | ## {{ if .Tag.Previous }}[{{ .Tag.Name }}]({{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }}){{ else }}{{ .Tag.Name }}{{ end }} ({{ datetime "2006-01-02" .Tag.Date }}) 4 | 5 | {{ range .CommitGroups -}} 6 | ### {{ .Title }} 7 | 8 | {{ range .Commits -}} 9 | * {{ .Subject }} 10 | {{ end }} 11 | {{ end -}} 12 | 13 | {{- if .NoteGroups -}} 14 | {{ range .NoteGroups -}} 15 | ### {{ .Title }} 16 | 17 | {{ range .Notes }} 18 | {{ .Body }} 19 | {{ end }} 20 | {{ end -}} 21 | {{ end -}} 22 | {{ end -}} 23 | -------------------------------------------------------------------------------- /.chglog/config.yml: -------------------------------------------------------------------------------- 1 | style: github 2 | template: CHANGELOG.tpl.md 3 | info: 4 | title: CHANGELOG 5 | repository_url: https://github.com/SAP/project-kb 6 | options: 7 | commits: 8 | filters: 9 | Type: 10 | - feat 11 | - fix 12 | - perf 13 | - refactor 14 | - chore 15 | commit_groups: 16 | title_maps: 17 | feat: New 18 | fix: Bug Fixes 19 | perf: Performance Improvements 20 | refactor: Refactoring 21 | chore: Misc 22 | header: 23 | pattern: "^(\\w*)\\:\\s(.*)$" 24 | pattern_maps: 25 | - Type 26 | - Subject 27 | notes: 28 | keywords: 29 | - BREAKING CHANGE 30 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml_disable: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | 3 | # Controls when the action will run. Triggers the workflow on push or pull request 4 | # events but only for the master branch 5 | on: 6 | push: 7 | branches: [master] 8 | pull_request: 9 | branches: [master] 10 | 11 | jobs: 12 | build: 13 | name: Build and Deploy Documentation 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout Master 18 | uses: actions/checkout@v2 19 | 20 | - name: Install setuptools 21 | run: apt-get install python3-setuptools 22 | 23 | - name: Install Material 24 | run: python3 -m pip install mkdocs-material 25 | 26 | - name: Build and Deploy Documentation using MkDocs 27 | uses: Tangerine-Community/tangy-mkdocs-build-action@v1 28 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths: 7 | - 'kaybee/**' 8 | - 'go.mod' 9 | - 'go.sum' 10 | - 'Makefile' 11 | - '.github/workflows/go.yml' 12 | pull_request: 13 | branches: [ master ] 14 | paths: 15 | - 'kaybee/**' 16 | - 'go.mod' 17 | - 'go.sum' 18 | - 'Makefile' 19 | 20 | jobs: 21 | 22 | build: 23 | name: Build 24 | runs-on: ubuntu-latest 25 | env: 26 | GO111MODULE: auto 27 | steps: 28 | 29 | - name: Set up Go 1.x 30 | uses: actions/setup-go@v2 31 | with: 32 | go-version: ^1.15 33 | id: go 34 | 35 | - name: Check out code into the Go module directory 36 | uses: actions/checkout@v2 37 | 38 | - name: Install golint 39 | run: go get -u golang.org/x/lint/golint 40 | 41 | - name: Install pkger 42 | run: go get -u github.com/markbates/pkger/cmd/pkger 43 | 44 | - name: Vet and test 45 | run: make -C kaybee check 46 | 47 | - name: Build 48 | run: make -C kaybee build-win build-linux build-macos 49 | -------------------------------------------------------------------------------- /.github/workflows/license.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: license 4 | 5 | # Controls when the action will run. Triggers the workflow on push or pull request 6 | # events but only for the master branch 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 14 | jobs: 15 | # This workflow contains a single job called "build" 16 | build: 17 | # The type of runner that the job will run on 18 | runs-on: ubuntu-latest 19 | 20 | # Steps represent a sequence of tasks that will be executed as part of the job 21 | steps: 22 | - name: REUSE Compliance Check 23 | uses: fsfe/reuse-action@v1.1 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | site 3 | **/merge.log 4 | */cpu.prof 5 | *.pyc 6 | *git-cache* 7 | *.log 8 | *.log.* 9 | **/cov_html 10 | .coverage 11 | similarities.csv 12 | 13 | # Virtual environment 14 | **/.venv/ 15 | 16 | # VSCode Settings 17 | **/.vscode/ 18 | 19 | # Regarding KB 20 | .kaybee 21 | kaybee/internal/repository/profile001.pdf 22 | kaybee/internal/repository/repository.test 23 | kaybee/internal/tasks/.kaybee 24 | kaybee/internal/tasks/cpu.prof 25 | kaybee/internal/tasks/profile001.pdf 26 | kaybee/internal/tasks/tasks.test 27 | kaybee/internal/repository/cpu.prof 28 | kaybee/kaybee.code-workspace 29 | kaybee/coverage.out 30 | kaybee/kaybee 31 | kaybee/internal/reconcile/debug.test 32 | kaybee/internal/.kaybee/**/* 33 | kaybee/dist/** 34 | kaybee/kaybeeconf.yaml 35 | kaybee/myconfig.yml 36 | kaybee/.kaybee/ 37 | kaybee/steady.sh 38 | kaybee/kaybeeconf-custom.yaml 39 | kaybee/kaybee-new-statements 40 | kaybee/pkged.go 41 | kaybeeconf.yaml 42 | 43 | # Regarding Prospector 44 | prospector/.env 45 | prospector/workspace.code-workspace 46 | prospector/disabled_tests/skip_test-commits.db 47 | prospector/disabled_tests/skip_test-vulnerabilities.db 48 | prospector/tracer_dataset_final_2 49 | prospector/results 50 | prospector/*.py 51 | prospector/install_fastext.sh 52 | prospector/cov_html/* 53 | prospector/config.yaml 54 | prospector/.coverage.* 55 | prospector/.coverage 56 | prospector/cov_html 57 | prospector/prospector.code-workspace 58 | prospector/requests-cache.sqlite 59 | prospector/prospector-report.html 60 | prospector/test_report.html 61 | prospector/test_report.json 62 | prospector/.idea/* 63 | prospector/*.html 64 | prospector/*.json 65 | prospector/evaluation/data/input/* 66 | prospector/evaluation/data/reports/* 67 | prospector/evaluation/config.yaml 68 | .DS_Store 69 | prospector/pipeline/reports/* 70 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: true 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.3.0 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: check-yaml 9 | - id: check-added-large-files 10 | - id: check-merge-conflict 11 | - repo: https://github.com/fsfe/reuse-tool 12 | rev: v0.11.1 13 | hooks: 14 | - id: reuse 15 | - repo: https://github.com/dnephin/pre-commit-golang 16 | rev: v0.3.5 17 | hooks: 18 | - id: go-fmt 19 | # - id: go-vet 20 | # args: [./kaybee] 21 | - id: go-lint 22 | - id: go-imports 23 | # - id: go-cyclo 24 | args: [-over=15] 25 | - id: validate-toml 26 | - id: no-go-testing 27 | # - id: gometalinter 28 | # - id: golangci-lint 29 | # - id: go-critic 30 | # - id: go-unit-tests 31 | # - id: go-build 32 | - repo: https://github.com/psf/black 33 | rev: 22.10.0 34 | hooks: 35 | - id: black 36 | - repo: https://github.com/pycqa/isort 37 | rev: 5.12.0 38 | hooks: 39 | - id: isort 40 | args: ["--profile", "black", "--filter-files"] 41 | - repo: https://github.com/pre-commit/pre-commit-hooks 42 | rev: v2.3.0 43 | hooks: 44 | - id: flake8 45 | args: # arguments to configure flake8 46 | # making isort line length compatible with black 47 | - "--max-line-length=100" 48 | - "--ignore=E203,E501,W503" 49 | - "--max-complexity=12" 50 | # - "--select=B,C,E,F,W,T4,B9" 51 | # these are errors that will be ignored by flake8 52 | # check out their meaning here 53 | # https://flake8.pycqa.org/en/latest/user/error-codes.html 54 | # - "--ignore=E203,E266,E501,W503,F403,F401,E402" 55 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute to project KB 2 | 3 | ### Developer Certificate of Origin (DCO) 4 | Due to legal reasons, contributors will be asked to accept a DCO when they create the first pull request to this project. This happens in an automated fashion during the submission process. SAP uses [the standard DCO text of the Linux Foundation](https://developercertificate.org/). 5 | 6 | ### **Do you have new vulnerability data?** 7 | 8 | A structured process to create and share vulnerability data is work in progress. 9 | 10 | Until it is defined, we invite you to just create pull requests in order to 11 | submit new vulnerability data, using an existing statement as a template. You 12 | should provide, at least the vulnerability identifier, the URL of the source 13 | code repository of the affected component and one or more identifiers of the 14 | commits used to fix the vulnerability. 15 | 16 | ### **Did you find a bug?** 17 | 18 | * **Ensure the bug was not already reported** by searching in the [GitHub Issues](https://github.com/sap/project-kb/issues). 19 | 20 | * If it is a new one, feel free to [open it](https://github.com/sap/project-kb/issues/new). Be sure to include a 21 | **title and a clear description**, as much relevant information as possible, and 22 | a **code sample** or an **executable test case** demonstrating the expected 23 | behavior that is not occurring. 24 | 25 | 26 | ### **Did you write a patch that fixes a bug?** 27 | 28 | * Open a new GitHub pull request with the patch. 29 | * Ensure the PR description clearly describes problem and solution. Include 30 | the relevant issue number if applicable. 31 | * Add one or more test cases as appropriate. 32 | * Make sure all other tests and checks still pass (that is, run `make check` in 33 | the `kaybee` folder; it should succeed) 34 | 35 | ### **Did you fix whitespace, format code, or make a purely cosmetic patch?** 36 | 37 | Changes that are cosmetic in nature and do not modify the 38 | stability, functionality, or testability are accepted. 39 | 40 | ### **Do you intend to add a new feature or change an existing one?** 41 | 42 | * Suggest your change by creating an issue, then start writing code in your own 43 | fork and make a PR when ready. Please make sure you provide tests for your 44 | code, and ensure you can successfully execute `make check` (in the `kaybee` 45 | folder) with no errors and that you include adequate documentation in your 46 | code. 47 | 48 | 49 | ### **Do you have questions about the source code?** 50 | 51 | * For now, file an issue (we consider that the need of clarifications at this 52 | stage indicates missing or inadequate documentation). 53 | 54 | ### **Do you want to contribute to the documentation?** 55 | 56 | You are most welcome to do so, project KB needs every one of you to succeed, 57 | every drop matters! 58 | 59 | Thanks! :heart: :heart: :heart: 60 | 61 | The project KB team 62 | -------------------------------------------------------------------------------- /MSR2019/README.md: -------------------------------------------------------------------------------- 1 | # MSR 2019 Data Showcase 2 | 3 | A description of the dataset and its possible applications (on top of fueling the vulerability assessment tool) can be found in 4 | 5 | *Serena E. Ponta, Henrik Plate, Antonino Sabetta, Michele Bezzi, Cédric Dangremont, [A Manually-Curated Dataset of Fixes to Vulnerabilities of Open-Source Software](http://arxiv.org/abs/1902.02595)* 6 | 7 | If you use this dataset, please cite it as: 8 | 9 | ``` 10 | @inproceedings{ponta2019msr, 11 | author={Serena E. Ponta and Henrik Plate and Antonino Sabetta and Michele Bezzi and 12 | C´edric Dangremont}, 13 | title={A Manually-Curated Dataset of Fixes to Vulnerabilities of Open-Source Software}, 14 | booktitle={Proceedings of the 16th International Conference on Mining Software Repositories}, 15 | year=2019, 16 | month=May, 17 | } 18 | ``` 19 | 20 | The Jupyter notebook used to analyze the dataset and to produce the statistics and the plots shown in the paper can be found [here](notebooks). 21 | 22 | ## Sample applications 23 | 24 | ### Automated classification of security-relevant commits in open-source repositories 25 | 26 | The paper [A Practical Approach to the Automatic Classification of Security-Relevant Commits](https://arxiv.org/abs/1807.02458) 27 | uses this dataset to train a classifier that detects security-relevant commits (i.e., that are likely to fix a vulnerability). 28 | -------------------------------------------------------------------------------- /MSR2019/notebooks/acacia/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/MSR2019/notebooks/acacia/__init__.py -------------------------------------------------------------------------------- /MSR2019/notebooks/acacia/commit_in_cve.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | # from acacia import git, utils 4 | import os 5 | import sys 6 | from pprint import pprint 7 | 8 | import plac 9 | import requests 10 | import requests_cache 11 | from bs4 import BeautifulSoup 12 | from bs4.element import Comment 13 | 14 | requests_cache.install_cache("requests_cache", expire_after=7 * 24 * 60 * 60) 15 | 16 | NO_COMMIT_FOUND = 0 # 'No reference to any commits was found') 17 | CVE_MISSING = 1 # 'CVE not found in NVD') 18 | CVE_NOT_PUBLIC = 2 # 'CVE id exists, but no description is available in the NVD') 19 | CVE_HAS_GITHUB_LINK = 4 # 'A GIT link is present in the CVE description') 20 | CVE_HAS_SVN_LINK = 8 # 'An SVN link is present in the CVE description') 21 | 22 | 23 | def is_visible_text(e): 24 | if isinstance(e, Comment): 25 | return False 26 | if e.parent.name in ["title", "script", "head", "style", "meta", "[document]"]: 27 | return False 28 | return True 29 | 30 | 31 | def html_to_txt(body): 32 | bs = BeautifulSoup(body, "html.parser") 33 | elements = bs.findAll(text=True) 34 | visible_txt = filter(is_visible_text, elements) 35 | return u" ".join(t.strip() for t in visible_txt) 36 | 37 | 38 | def check_commit_in_cve(cve, verbose=False): 39 | result = NO_COMMIT_FOUND 40 | url = "https://nvd.nist.gov/vuln/detail/" + cve 41 | 42 | r = requests.get(url) 43 | 44 | if r.status_code != 200: 45 | result += CVE_MISSING 46 | else: 47 | page_txt = html_to_txt(r.text) 48 | 49 | if "CVE ID Not Found" in page_txt: 50 | result += CVE_NOT_PUBLIC 51 | else: 52 | if "git" in page_txt: 53 | result += CVE_HAS_GITHUB_LINK 54 | if "svn" in page_txt: 55 | result += CVE_HAS_SVN_LINK 56 | 57 | if verbose: 58 | print("CVE: " + str(cve)) 59 | print(" " + str(url)) 60 | print("Result: " + str(result)) 61 | # print(' ' + str(result[1])) 62 | 63 | return (url, result) 64 | 65 | 66 | # (help, kind, abbrev, type, choices, metavar) 67 | @plac.annotations( 68 | cve=("CVE to check", "positional", None, str, None, "REPOSITORY"), 69 | verbose=("Verbose", "flag", "v", bool), 70 | ) 71 | def main(cve, verbose): 72 | url, result = check_commit_in_cve(cve, verbose) 73 | print("{}\t{}".format(result, url)) 74 | 75 | 76 | if __name__ == "__main__": 77 | import plac 78 | 79 | plac.call(main) 80 | -------------------------------------------------------------------------------- /MSR2019/notebooks/acacia/git_temporal.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | import os 4 | import sys 5 | from datetime import datetime 6 | from pprint import pprint 7 | 8 | import plac 9 | from acacia import git, utils 10 | 11 | GIT_CACHE = "/tmp/git-cache-3" 12 | 13 | 14 | def extract_timing_data(commit_id, repo_url, verbose=False, git_cache=GIT_CACHE): 15 | 16 | if not os.path.exists(git_cache): 17 | print("Folder " + git_cache + " must exist!") 18 | return None 19 | 20 | # ensure the repository is available locally 21 | git.clone_repo(repo_url, output_folder=GIT_CACHE, skip_existing=True) 22 | 23 | cwd = os.path.join(git_cache, git.folder_name_from_url(repo_url)) 24 | 25 | # get tag info 26 | tag = utils.execute("git tag --sort=taggerdate --contains " + commit_id, cwd=cwd)[0] 27 | if tag != "": 28 | tag_date = utils.execute( 29 | 'git show -s --format="%at" ' + tag + "^{commit}", cwd=cwd 30 | )[0][1:-1] 31 | else: 32 | tag_date = "0" 33 | 34 | try: 35 | commit_date = utils.execute('git show -s --format="%ct" ' + commit_id, cwd=cwd)[ 36 | 0 37 | ][1:-1] 38 | time_delta = int(tag_date) - int(commit_date) 39 | except: 40 | commit_date = "0" 41 | time_delta = 0 42 | # print("exception:", commit_id, repo_url, commit_date, tag_date) 43 | 44 | if verbose: 45 | print("repository: " + repo_url) 46 | print("commit: " + commit_id) 47 | print("commit_date: " + commit_date) 48 | print( 49 | " " 50 | + datetime.utcfromtimestamp(int(commit_date)).strftime("%Y-%m-%d %H:%M:%S") 51 | ) 52 | print("tag: " + tag) 53 | print("tag_date: " + tag_date) 54 | print( 55 | " " 56 | + datetime.utcfromtimestamp(int(tag_date)).strftime("%Y-%m-%d %H:%M:%S") 57 | ) 58 | print( 59 | "Commit-to-release interval: {0:.2f} days".format(time_delta / (3600 * 24)) 60 | ) 61 | 62 | result = (tag, tag_date, commit_date, time_delta) 63 | print(result) 64 | return result 65 | 66 | 67 | # (help, kind, abbrev, type, choices, metavar) 68 | @plac.annotations( 69 | repo_url=("Repository", "positional", None, str, None, "REPOSITORY"), 70 | commit_id=("Commit", "positional", None, str, None, "COMMIT"), 71 | verbose=("Verbose", "flag", "v", bool), 72 | git_cache=("Git repository dir", "option", "g", str, None, "REPO_DIR"), 73 | ) 74 | def main(repo_url, commit_id, verbose=False, git_cache=GIT_CACHE): 75 | return extract_timing_data(commit_id, repo_url, verbose, git_cache) 76 | 77 | 78 | if __name__ == "__main__": 79 | import plac 80 | 81 | plac.call(main) 82 | -------------------------------------------------------------------------------- /MSR2019/notebooks/acacia/utils.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | import logging 4 | import re 5 | import subprocess 6 | import traceback 7 | from datetime import datetime 8 | 9 | # def execute_wait(cmd, cwd='/tmp', encoding='latin-1', timeout=None): 10 | # logging.debug("exec: {} (in dir: {})".format( cmd, cwd ) ) 11 | # try: 12 | # process = subprocess.Popen(cmd.split(), cwd=cwd) 13 | # process.wait(timeout = timeout) 14 | # return process.stdout.decode(encoding).split('\n') 15 | # except Exception as e: 16 | # logging.error('Exception happened while running ' + cmd) 17 | # logging.error(str(e)) 18 | # return False 19 | 20 | 21 | def execute(cmd, cwd="/tmp", encoding="latin-1"): 22 | try: 23 | p2 = subprocess.Popen(cmd.split(), cwd=cwd, stdout=subprocess.PIPE) 24 | out, err = p2.communicate() 25 | if err: 26 | # traceback.print_exc() 27 | return None 28 | 29 | raw_output_list = out.decode(encoding).split("\n") 30 | return raw_output_list 31 | 32 | except Exception as e: 33 | # traceback.print_exc() 34 | return None 35 | 36 | 37 | # def flatten_lists(container): 38 | # for i in container: 39 | # if isinstance(i, (list, tuple)): 40 | # for j in flatten_lists(i): 41 | # yield j 42 | # else: 43 | # yield i 44 | 45 | # def clean_and_split_str(string): 46 | # ''' Clean and split sentence into words ''' 47 | # # string = string.encode('UTF-8') 48 | # strip_special_chars = re.compile("[^A-Za-z]+") 49 | # string = re.sub(strip_special_chars, " ", string) 50 | # return string.strip().split() 51 | 52 | 53 | class LatexExporter: 54 | def __init__(self): 55 | self.data = set() 56 | 57 | def __str__(self): 58 | self.print() 59 | 60 | def save(self, k, v, comment=""): 61 | print("[" + k + "] " + comment.strip() + ": " + str(v)) 62 | self.data.add((k, v, comment)) 63 | 64 | def print(self): 65 | for d in self.data: 66 | if d[2] != "": 67 | print("\n% " + str(d[2])) 68 | print("\\newcommand{\\" + str(d[0]) + "}{" + str(d[1]) + "\\xspace}") 69 | 70 | def to_file(self, filename): 71 | with open(filename, "w") as f: 72 | f.write( 73 | "%\n% This file was auto-generated on " + str(datetime.now()) + "\n%\n" 74 | ) 75 | for d in self.data: 76 | line = "" 77 | if d[2] != "": 78 | line += "\n% " + str(d[2]) + "\n" 79 | line += "\\newcommand{\\" + d[0] + "}{" + str(d[1]) + "\\xspace}" 80 | print(line) 81 | f.write(line + "\n") 82 | -------------------------------------------------------------------------------- /MSR2019/notebooks/msr2019/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/MSR2019/notebooks/msr2019/.keep -------------------------------------------------------------------------------- /MSR2019/notebooks/msr2019/img/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/MSR2019/notebooks/msr2019/img/.keep -------------------------------------------------------------------------------- /MSR2019/notebooks/msr2019/tex/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/MSR2019/notebooks/msr2019/tex/.keep -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # $(eval VERSION = $(shell head VERSION)) 2 | # $(eval GIT_COMMIT = $(shell git rev-parse HEAD)) 3 | # $(eval NOW = $(shell date)) 4 | 5 | # # Go parameters 6 | # GOCMD=go 7 | # GOBUILD=$(GOCMD) build 8 | # GOCLEAN=$(GOCMD) clean 9 | # GOTEST=$(GOCMD) test 10 | # GOGET=$(GOCMD) get 11 | # BINARY_NAME=kaybee 12 | # BINARY_UNIX=$(BINARY_NAME)-$(VERSION)_linux-amd64 13 | # BINARY_WINDOWS=$(BINARY_NAME)-$(VERSION)_win-amd64 14 | # BINARY_MACOS=$(BINARY_NAME)-$(VERSION)_darwin-amd64 15 | 16 | # all: lint vet test build #ui 17 | 18 | # build: fmt 19 | # $(GOBUILD) -ldflags='-X "github.com/sap/project-kb/kaybee/cmd.buildDate=$(NOW)" -X "github.com/sap/project-kb/kaybee/cmd.buildCommitID=$(GIT_COMMIT)" -X "github.com/sap/project-kb/kaybee/cmd.version=$(VERSION)"' -o $(BINARY_NAME) -v 20 | 21 | # # ui: 22 | # # $(MAKE) --directory=ui 23 | 24 | SUBDIRS := kaybee 25 | 26 | .PHONY: all $(SUBDIRS) 27 | 28 | all: $(SUBDIRS) build-docs 29 | $(SUBDIRS): 30 | $(MAKE) -C $@ 31 | 32 | build: 33 | $(MAKE) --directory=kaybee build 34 | 35 | test: 36 | $(MAKE) --directory=kaybee test 37 | 38 | deploy-docs: 39 | mkdocs gh-deploy 40 | 41 | build-docs: 42 | mkdocs build 43 | 44 | serve-docs: 45 | mkdocs serve 46 | 47 | changelog: 48 | $(eval TAG = $(shell cat kaybee/VERSION)) 49 | git-chglog "v$(TAG)" 50 | -------------------------------------------------------------------------------- /REUSE.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | SPDX-PackageName = "project \"KB\"" 3 | SPDX-PackageSupplier = "Antonino Sabetta " 4 | SPDX-PackageDownloadLocation = "https://github.com/sap/project-kb" 5 | 6 | [[annotations]] 7 | path = ["prospector/**", "kaybee/**", "docs/**", "scripts/**", "vulnerability-data/**", "MSR2019/**", "NOTICE.txt", "README.md"] 8 | precedence = "aggregate" 9 | SPDX-FileCopyrightText = "2019-2020 SAP SE or an SAP affiliate company and project \"KB\" contributors" 10 | SPDX-License-Identifier = "Apache-2.0" 11 | 12 | [[annotations]] 13 | path = ["**.bib", "mkdocs.yml", ".chglog/**", ".github/**", "CHANGELOG.md", "CONTRIBUTING.md", "Makefile", "go.mod", "go.sum", ".pre-commit-config.yaml", ".gitignore", "**/**.yaml", "Pipfile"] 14 | precedence = "aggregate" 15 | SPDX-FileCopyrightText = "2019-2020 SAP SE or an SAP affiliate company and project \"KB\" contributors" 16 | SPDX-License-Identifier = "CC0-1.0" 17 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to contribute to project "KB" 2 | 3 | Welcome to the contribution guidelines for project "KB". This webpage provides information on how to contribute to our two different tools: [Prospector](prospector.md) and [KayBee](kaybee.md). 4 | 5 | ## Prospector 6 | 7 | If you're interested in contributing to the development of Prospector, 8 | the following pages provide instructions on setting up the development environment and how to contribute to our project. 9 | 10 | - [Development Setup](prospector/dev_setup.md) 11 | - [Contribution Guidelines](prospector/issues.md) 12 | 13 | ## Kaybee 14 | 15 | If you're interested in contributing to the development of Kaybee, 16 | the following pages provide instructions on setting up the development environment and how to contribute to our project. 17 | 18 | - [Development Setup](kaybee/dev_setup.md) 19 | - [Contribution Guidelines](kaybee/guidelines.md) 20 | 21 | ## Do you want to contribute to the documentation? 22 | 23 | You are most welcome to do so, project "KB" needs every one of you to succeed, every drop matters! 24 | 25 | Thanks! 26 | 27 | The project "KB" team 28 | -------------------------------------------------------------------------------- /docs/css/buttons.css: -------------------------------------------------------------------------------- 1 | /*.md-button .md-button--primary { 2 | background-color: green; 3 | color: #ffffff; 4 | margin-right: 50px; 5 | }*/ 6 | -------------------------------------------------------------------------------- /docs/css/faq.css: -------------------------------------------------------------------------------- 1 | /*margin: 1.6rem 0;*/ 2 | 3 | span.toctitle { 4 | font-size: 1.8rem; 5 | font-weight: 600; 6 | letter-spacing: -0.01em; 7 | } 8 | 9 | div.toc>ul { 10 | margin: 0; 11 | } 12 | 13 | div.toc>ul>li { 14 | margin: 10px; 15 | list-style: none; 16 | font-weight: 800; 17 | } 18 | 19 | div.toc>ul>li>ul>li { 20 | list-style: none; 21 | font-weight: 400; 22 | } 23 | 24 | div.toc>ul>li>ul>li:before { 25 | content: "-"; 26 | font-weight: 600; 27 | display: block; 28 | float: left; 29 | width: 1.2em; 30 | color: #00af26; 31 | } 32 | 33 | /* div.md-sidebar--secondary { 34 | /* display: none; */ 35 | /*} */ 36 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions (F.A.Q.) 2 | 3 | ## Troubleshooting 4 | 5 | ??? question "How do I get support?" 6 | 7 | If you need help with project KB, with its data or with the `kaybee` tool, 8 | the most efficient way to get support is to ask on this [Gitter channel](https://gitter.im/project-kb/help). 9 | 10 | 11 | ## Misc 12 | 13 | ??? question "What is the relation of project "KB" with Eclipse Steady?" 14 | 15 | Eclipse Steady needs code-level data about how vulnerabilities 16 | (basically, which commit in which repository fix which vulnerability). 17 | Project "KB" promotes a collaborative approach to curating such data. 18 | -------------------------------------------------------------------------------- /docs/kaybee-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/docs/kaybee-arch.png -------------------------------------------------------------------------------- /docs/kaybee/dev_setup.md: -------------------------------------------------------------------------------- 1 | # Development Setup 2 | Coming soon... 3 | -------------------------------------------------------------------------------- /docs/kaybee/guidelines.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | ## **Do you intend to contribute with new vulnerability data?** 4 | 5 | A structured process to create and share vulnerability data is work in progress. 6 | 7 | For the time being, you can use `kaybee create ` to generate a skeleton 8 | statement that you can then edit with a normal text editor. 9 | 10 | You can then create pull requests against the `vulnerability-data` branch in this repository 11 | or you can host the statements in your own repository (please do let us know if you choose 12 | this option so that we can benefit from your work by pulling your statements). 13 | 14 | You will need to dedicate a branch to the statements: the branch must contain a 15 | top-level `statements` folder in which you can store your statements. You can 16 | refer to the [`vulnerability-data` branch in this 17 | repository](https://github.com/SAP/project-kb/tree/vulnerability-data) to see 18 | what is the expected structure. 19 | 20 | Your statement should provide, at least, a vulnerability identifier (use the CVE 21 | identifier if it exists), the URL of the source code repository of the affected 22 | component and one or more identifiers of the commits used to fix the 23 | vulnerability. 24 | 25 | ## **Did you find a bug?** 26 | 27 | * **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/sap/project-kb/issues). 28 | 29 | * If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/sap/project-kb/issues/new). Be sure to include a **title and clear description**, as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring. 30 | 31 | 32 | ## **Did you write a patch that fixes a bug?** 33 | 34 | * Open a new GitHub pull request with the patch. 35 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. 36 | * Add one or more test cases as appropriate 37 | * Make sure all other tests and checks still pass (that is, run `make check` in the `kaybee` folder; it should succeed) 38 | 39 | ## **Did you fix whitespace, format code, or make a purely cosmetic patch?** 40 | 41 | Changes that are cosmetic in nature and do not add anything substantial to the stability, functionality, or testability are accepted at this time. 42 | 43 | ## **Do you intend to add a new feature or change an existing one?** 44 | 45 | Suggest your change by creating an issue and start writing code in your own fork and make a PR when ready. 46 | Please make sure you provide tests for your code, and ensure you can successfully execute `make check` (in the `kaybee` folder) 47 | with no errors and that you include adequate documentation in your code. 48 | 49 | ## **Do you have questions about the source code?** 50 | 51 | For now, file an issue (we consider that the need of clarifications at this stage indicates missing or inadequate documentation). 52 | -------------------------------------------------------------------------------- /docs/prospector/issues.md: -------------------------------------------------------------------------------- 1 | # Issue Tracker 2 | 3 | ## Reporting an Issue 4 | 5 | If you encounter a bug or have a specific feature request, please follow these steps to report it: 6 | 7 | 1. Go to the [Issues](https://github.com/SAP/project-kb/issues) section. 8 | 2. Click on the **New Issue** button. 9 | 3. Provide a descriptive title for your issue. 10 | 4. In the issue description, include the following information: 11 | - Steps to reproduce the issue (if applicable). 12 | - Expected behavior. 13 | - Actual behavior observed. 14 | - Any relevant error messages or logs. 15 | - Screenshots or code snippets (if applicable and helpful). 16 | 5. Add appropriate labels to categorize the issue (e.g., bug, enhancement). 17 | 6. Click on the **Submit new issue** button to create the issue. 18 | 19 | ## Contributing to Issue Resolution 20 | 21 | If you're interested in contributing to the resolution of open issues, you can follow these steps: 22 | 23 | 1. Go to the [Issues](https://github.com/sap/prospector/issues) section. 24 | 2. Browse through the list of open issues and find one that you'd like to work on. 25 | 3. Fork the repository, create a new branch and make your code changes. 26 | 4. Open a new GitHub pull request with the patch. 27 | 5. Make sure that the pull request (PR) description provides a comprehensive explanation of both the problem and the proposed solution. 28 | -------------------------------------------------------------------------------- /docs/report_casestudy_complete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/docs/report_casestudy_complete.png -------------------------------------------------------------------------------- /docs/team.md: -------------------------------------------------------------------------------- 1 | Coming soon... 2 | -------------------------------------------------------------------------------- /docs/user_manual.md: -------------------------------------------------------------------------------- 1 | # User Manual 2 | 3 | ## Summary 4 | 5 | This is the user manual of the `kaybee` tool, which is part of `project KB`. 6 | 7 | **WARNING:** this document is *work in progress*. Some of the commands 8 | mentioned below might not be fully implemented at this time. 9 | 10 | Please refer to the output of `kaybee help` and `kaybee help` to 11 | know what flags and options are available. 12 | 13 | ## Commands 14 | 15 | ### `create` 16 | 17 | The `create` command is used to create vulnerability statements. 18 | 19 | ### `pull` 20 | 21 | The `pull` command is used to retrieve statements from remote sources to the local machine. 22 | 23 | ### `merge` 24 | 25 | The `merge` command is used to aggregate (and possibly reconcile) 26 | vulnerability statements coming from different sources. 27 | 28 | ### `import` 29 | 30 | The `import` command is used to import vulnerability data from a 31 | variety of services/formats, in particular from a Steady backend. 32 | 33 | ### `export` 34 | 35 | The `export` command is used to export vulnerability statements to 36 | multiple formats. 37 | 38 | ### `list` 39 | 40 | *note*: this is still not implemented at this time (v0.6.5) 41 | 42 | The `list` command is used to display the content of statement repositories (remote or local). 43 | 44 | 45 | ### `purge` 46 | 47 | The `purge` command deletes all the local clones of remote sources that have not been 48 | updated for longer than a specified amount of time. This command is used to ensure compliance 49 | to data retention policies, and can be invoked, for example, as a recurrent scheduled job. 50 | 51 | ### `update` 52 | 53 | The `update` command is used to check if a newer version of the tool is available, and if so, to update it. 54 | 55 | ### `version` 56 | 57 | The `version` command is used to show detailed information about 58 | the current version of `kaybee`. 59 | 60 | ## Configuration 61 | 62 | *to be written* 63 | -------------------------------------------------------------------------------- /kaybee/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | fail_fast: true 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v3.2.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-yaml 11 | # - id: check-added-large-files 12 | # - id: check-merge-conflict 13 | # - id: detect-private-key 14 | - repo: https://github.com/fsfe/reuse-tool 15 | rev: v0.11.1 16 | hooks: 17 | - id: reuse 18 | - repo: git://github.com/dnephin/pre-commit-golang 19 | rev: v0.3.5 20 | hooks: 21 | - id: go-fmt 22 | # - id: go-vet 23 | # args: [./kaybee] 24 | - id: go-lint 25 | - id: go-imports 26 | # - id: go-cyclo 27 | args: [-over=15] 28 | - id: validate-toml 29 | - id: no-go-testing 30 | # - id: gometalinter 31 | # - id: golangci-lint 32 | # - id: go-critic 33 | # - id: go-unit-tests 34 | # - id: go-build 35 | -------------------------------------------------------------------------------- /kaybee/Makefile: -------------------------------------------------------------------------------- 1 | $(eval VERSION = $(shell head VERSION)) 2 | $(eval GIT_COMMIT = $(shell git rev-parse HEAD)) 3 | $(eval NOW = $(shell date)) 4 | 5 | # Go parameters 6 | GOCMD=go 7 | GOBUILD=$(GOCMD) build -mod=mod 8 | GOCLEAN=$(GOCMD) clean 9 | GOTEST=$(GOCMD) test 10 | GOGET=$(GOCMD) get 11 | BINARY_NAME=kaybee 12 | LDFLAGS=-ldflags='-X "github.com/sap/project-kb/kaybee/cmd.buildDate=$(NOW)" -X "github.com/sap/project-kb/kaybee/cmd.buildCommitID=$(GIT_COMMIT)" -X "github.com/sap/project-kb/kaybee/cmd.version=$(VERSION)"' 13 | BINARY_UNIX=$(BINARY_NAME)-$(VERSION)_linux-amd64 14 | BINARY_WINDOWS=$(BINARY_NAME)-$(VERSION)_win-amd64 15 | BINARY_MACOS=$(BINARY_NAME)-$(VERSION)_darwin-amd64 16 | 17 | all: env lint vet test build #ui 18 | 19 | env: 20 | $(GOCMD) env -w GO111MODULE=auto 21 | 22 | build: env pkged.go fmt 23 | $(GOBUILD) $(LDFLAGS) -o $(BINARY_NAME) -v 24 | 25 | pkged.go: internal/tasks/data/default_config.yaml 26 | pkger 27 | 28 | # ui: 29 | # $(MAKE) --directory=ui 30 | 31 | check: lint vet test 32 | 33 | test: pkged.go 34 | $(GOTEST) -failfast -coverprofile=coverage.out ./... 35 | 36 | coverage: test 37 | go tool cover -html=coverage.out 38 | 39 | clean: 40 | $(GOCLEAN) 41 | @rm -f $(BINARY_NAME) 42 | @rm -f $(BINARY_UNIX) 43 | @rm -f $(BINARY_MACOS) 44 | @rm coverage.out 45 | @rm -fr dist 46 | 47 | run: 48 | $(GOBUILD) -o $(BINARY_NAME) -v ./... 49 | ./$(BINARY_NAME) 50 | 51 | vet: fmt 52 | $(GOCMD) vet ./... 53 | 54 | doc: 55 | $(GOCMD) doc ./... 56 | 57 | fmt: 58 | $(GOCMD) fmt ./... 59 | 60 | lint: vet 61 | @golint ./... 62 | 63 | security: 64 | @gosec -include=G101,G201,G202,G203,G301,G302,G303,G305,G306,G401 ./... 65 | @echo "[OK] Go security check was completed!" 66 | 67 | # Cross compilation 68 | build-linux: pkged.go 69 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GOBUILD) $(LDFLAGS) -o dist/$(BINARY_UNIX) -v 70 | 71 | build-win: pkged.go 72 | CGO_ENABLED=0 GOOS=windows GOARCH=amd64 $(GOBUILD) $(LDFLAGS) -o dist/$(BINARY_WINDOWS) -v 73 | 74 | build-macos: pkged.go 75 | CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(GOBUILD) $(LDFLAGS) -o dist/$(BINARY_MACOS) -v 76 | 77 | build-all: pkged.go build-win build-macos build-linux 78 | 79 | .PHONY: build-all 80 | -------------------------------------------------------------------------------- /kaybee/README.md: -------------------------------------------------------------------------------- 1 | # KayBee 2 | 3 | 4 | ## Install 5 | 6 | Just [download a binary](https://github.com/SAP/project-kb/releases/latest) compatible with your 7 | operating system, make sure it has execution permissions if applicable, and then 8 | run it. 9 | 10 | Optionally, for your convenience, you may want to make sure that the binary is 11 | in your `$PATH`. 12 | 13 | For example, in Linux you would put the following line in your `.bashrc` file: 14 | 15 | export PATH=$PATH:/usr/local/bin/kaybee 16 | 17 | (please, make sure you adjust the path to the `kaybee` binary as necessary) 18 | 19 | Alternatively, you can clone this repository and build it yourself (you will need `go` and `make`). 20 | You can do so with the `make` command; inspecting the Makefile first is a good idea. 21 | 22 | ## Usage 23 | 24 | Once you have downloaded or built the binary, you can see the list of supported 25 | commands with: 26 | 27 | `kaybee --help` 28 | 29 | 30 | To import vulnerability data in Eclipse Steady, run the following command: 31 | 32 | ```kaybee pull``` 33 | 34 | This will retrieve all the statements from all the sources configured in your 35 | `kaybeeconf.yaml` file. 36 | 37 | You can then run: 38 | 39 | ```kaybee export --target steady``` 40 | 41 | to generate a script `steady.sh`; edit the top of the script to indicate the URL of 42 | your Steady backend and change the other variables as you see fit (there are comments 43 | in the file to guide you), then run it. 44 | -------------------------------------------------------------------------------- /kaybee/VERSION: -------------------------------------------------------------------------------- 1 | 0.6.19-dev 2 | -------------------------------------------------------------------------------- /kaybee/cmd/cmd_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "net/http" 8 | "net/http/httptest" 9 | "os" 10 | "testing" 11 | 12 | "github.com/sap/project-kb/kaybee/internal/tasks" 13 | ) 14 | 15 | // TestExport checks if the export cmd works 16 | func TestExport(t *testing.T) { 17 | exportCmd.SetArgs([]string{"-t", "steady"}) 18 | exportCmd.Execute() 19 | } 20 | 21 | func TestImport(t *testing.T) { 22 | 23 | f, err := os.Open("../testdata/steady/all_bugs.json") 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | defer f.Close() 28 | 29 | payload, err := ioutil.ReadAll(f) 30 | if err != nil { 31 | log.Fatal(err) 32 | } 33 | 34 | srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 35 | w.WriteHeader(200) 36 | w.Write([]byte(payload)) 37 | })) 38 | defer srv.Close() 39 | 40 | fmt.Println(srv.URL) 41 | 42 | task := tasks.NewImportTask(). 43 | WithBackend(srv.URL). 44 | WithOutputPath("/tmp/kaybee-test/"). 45 | WithConcurrency(4). 46 | WithLimit(5) 47 | 48 | task.Execute() 49 | } 50 | -------------------------------------------------------------------------------- /kaybee/cmd/create.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2019 - 2020, SAP 3 | */ 4 | 5 | package cmd 6 | 7 | import ( 8 | "os" 9 | 10 | "github.com/gookit/color" 11 | "github.com/sap/project-kb/kaybee/internal/tasks" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | var useGUI bool 16 | 17 | // TODO make constructor method, as in ExportCmd, for testability 18 | 19 | /* 20 | createCommand represents the command to make a new statement 21 | */ 22 | var createCommand = &cobra.Command{ 23 | Use: "create", 24 | Short: "Create a new statement", 25 | Long: ``, 26 | Run: doCreate, 27 | } 28 | 29 | func init() { 30 | rootCmd.AddCommand(createCommand) 31 | createCommand.Flags().BoolVarP(&useGUI, "gui", "g", false, "Use the browser-based graphical user interface") 32 | } 33 | 34 | func doCreate(cmd *cobra.Command, args []string) { 35 | 36 | if len(args) < 1 { 37 | color.Warn.Prompt("Please provide a vulnerability ID for the new statement") 38 | os.Exit(-1) 39 | } 40 | 41 | t := tasks.NewCreateTask(). 42 | WithGUI(useGUI) 43 | 44 | t.Verbose(verbose) 45 | t.WithVulnerabilityID(args[0]) 46 | t.Execute() 47 | } 48 | -------------------------------------------------------------------------------- /kaybee/cmd/export.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2020 SAP 3 | */ 4 | 5 | // Package cmd contains all commands 6 | package cmd 7 | 8 | import ( 9 | "github.com/sap/project-kb/kaybee/internal/tasks" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | var ( 14 | skipMerge bool 15 | exportTarget string 16 | mergedStatementsDir string = "./merged" 17 | outputFile string 18 | ) 19 | 20 | var exportCmd = &cobra.Command{ 21 | Use: "export", 22 | Short: "Export a merged text-based kb into a variety of formats", 23 | Long: `A longer description that spans multiple lines and likely contains examples 24 | and usage of using your command. For example: 25 | 26 | Cobra is a CLI library for Go that empowers applications. 27 | This application is a tool to generate the needed files 28 | to quickly create a Cobra application.`, 29 | Run: runCommand, 30 | } 31 | 32 | func init() { 33 | rootCmd.AddCommand(exportCmd) 34 | 35 | exportCmd.Flags().StringVarP(&exportTarget, "target", "t", "", "Target of the export (e.g., xml, json, steady") 36 | exportCmd.Flags().StringVarP(&mergedStatementsDir, "from", "f", ".kaybee/merged", "Path to the statements to export") 37 | exportCmd.Flags().StringVarP(&outputFile, "to", "o", "", "Name of the output file") 38 | } 39 | 40 | func runCommand(cmd *cobra.Command, args []string) { 41 | doExport(args) 42 | } 43 | 44 | func doExport(args []string) { 45 | // fmt.Println("Exporting....") 46 | 47 | // if skipMerge { 48 | // fmt.Println("Skipping merge, re-using the results of the previous merge") 49 | // } else { 50 | // doMerge(cmd, args) 51 | // } 52 | 53 | t := tasks.NewExportTask(). 54 | WithExportScripts(configuration.ExportScripts()). 55 | WithSource(mergedStatementsDir). 56 | WithTarget(exportTarget). 57 | WithOutputFile(outputFile). 58 | WithDenylist(configuration.ExportDenylist()) 59 | 60 | t.Verbose(verbose) 61 | t.Execute() 62 | 63 | } 64 | -------------------------------------------------------------------------------- /kaybee/cmd/import.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2019 - 2020, SAP 3 | */ 4 | 5 | package cmd 6 | 7 | import ( 8 | "github.com/sap/project-kb/kaybee/internal/tasks" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // NewImportCmd is a constructor of an ImportCommand, useful for testability 13 | func NewImportCmd() *cobra.Command { 14 | return &cobra.Command{ 15 | Use: "import", 16 | Short: "Imports vulnerability data from a Steady backend or database to plain-text statements", 17 | Long: `Imports vulnerability data from a Steady backend or database to plain-text statements`, 18 | // Args: cobra.MinimumNArgs(1), 19 | Run: doImport, 20 | } 21 | } 22 | 23 | /* 24 | importCmd represents the import command, which is used to extract data from an 25 | existing Steady backend and populate a plain-text statement repository. 26 | */ 27 | var importCmd = NewImportCmd() 28 | 29 | var ( 30 | concurrency int 31 | backend string 32 | limit int 33 | importPath string 34 | ) 35 | 36 | func init() { 37 | rootCmd.AddCommand(importCmd) 38 | importCmd.Flags().IntVarP(&limit, "limit", "n", 0, "limits the amount of rows displays (default 0)") 39 | importCmd.Flags().IntVar(&concurrency, "concurrency", 0, "limits the amount go routine per thread (default 0)") 40 | importCmd.Flags().StringVarP(&backend, "backend", "b", "", "URL of the Steady backend from which to import vulnerability data.") 41 | importCmd.Flags().StringVarP(&importPath, "path", "p", ".kaybee/imported", "Folder in which to store the imported statements") 42 | } 43 | 44 | func doImport(cmd *cobra.Command, args []string) { 45 | 46 | if backend == "" { 47 | backend = configuration.Backend() 48 | } 49 | 50 | t := tasks.NewImportTask(). 51 | WithBackend(backend). 52 | WithConcurrency(concurrency). 53 | WithLimit(limit). 54 | WithOutputPath(importPath) 55 | 56 | t.Verbose(verbose) 57 | t.Execute() 58 | } 59 | -------------------------------------------------------------------------------- /kaybee/cmd/list.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | // Copyright © 2019 - 2020, SAP. All rights reserved. 3 | 4 | package cmd 5 | 6 | import ( 7 | "github.com/sap/project-kb/kaybee/internal/goal" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // listCmd represents the list command 12 | var listCmd = &cobra.Command{ 13 | Use: "list", 14 | Short: "List elements from the vulnerability document store", 15 | Long: `List elements from the vulnerability document store following 16 | this overall declaration: 17 | 18 | list [FLAGS] [TYPE] [TYPE_SPECIFIC_ARGUMENTS] 19 | 20 | - sources from a config file: list sources [CONFIG_FILE] 21 | - bugs stored in a certain repository: list bugs [REPO_URL] 22 | `, 23 | Args: cobra.MinimumNArgs(1), 24 | Run: func(cmd *cobra.Command, args []string) { 25 | }, 26 | } 27 | 28 | var listBugsCmd = &cobra.Command{ 29 | Use: "bugs", 30 | Short: "List bugs stored in an instance of the CVE document store", 31 | Long: `List bugs stored in an instance of the CVE document store. This command 32 | fetches the latest version of the document store hosted in a given URL and temporarily 33 | stores it in the given path. 34 | 35 | kaybee list bugs [FLAGS] [REPO_URL] [CACHE_PATH(optional)] 36 | `, 37 | Args: cobra.MinimumNArgs(1), 38 | Run: func(cmd *cobra.Command, args []string) { 39 | c := &goal.ListBugs{} 40 | c.New("", verbose) 41 | c.Add(args[0], branch, cacheDir, signed) 42 | c.Run() 43 | }, 44 | } 45 | 46 | var listRepoCmd = &cobra.Command{ 47 | Use: "sources", 48 | Short: "List sources declared in a sync file", 49 | Long: `List sources declared in a sync file`, 50 | Args: cobra.MinimumNArgs(1), 51 | Run: func(cmd *cobra.Command, args []string) { 52 | // c := &goal.ListSources{} 53 | // c.New(args[0]) 54 | // c.Run() 55 | }, 56 | } 57 | 58 | var listSigCmd = &cobra.Command{ 59 | Use: "signatures", 60 | Short: "List public keys declared in a sync file", 61 | Long: `List public keys declared in a sync file`, 62 | Args: cobra.MinimumNArgs(1), 63 | Run: func(cmd *cobra.Command, args []string) { 64 | c := &goal.ListBugs{} 65 | c.New(args[0], verbose) 66 | 67 | // for _, s := range c.List.Base.Parser.GetSources() { 68 | // c.Add(s.Repo, s.Branch, cacheDir, signed) 69 | // } 70 | 71 | c.PubKeys() 72 | }, 73 | } 74 | 75 | var ( 76 | repo, branch string 77 | signed bool 78 | ) 79 | 80 | func init() { 81 | rootCmd.AddCommand(listCmd) 82 | listCmd.AddCommand(listRepoCmd) 83 | 84 | listBugsCmd.Flags().BoolVarP(&signed, "signed", "s", false, "Ignores all unsigned commits") 85 | listBugsCmd.Flags().StringVarP(&repo, "repo", "r", "", "url of the vulnkb") 86 | listBugsCmd.Flags().StringVarP(&branch, "branch", "b", "master", "branch storing the vulnkb") 87 | listCmd.AddCommand(listBugsCmd) 88 | 89 | listCmd.AddCommand(listSigCmd) 90 | } 91 | -------------------------------------------------------------------------------- /kaybee/cmd/merge.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2019 - 2020, SAP. All rights reserved. 2 | 3 | package cmd 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/sap/project-kb/kaybee/internal/conf" 9 | "github.com/sap/project-kb/kaybee/internal/tasks" 10 | "github.com/spf13/cobra" 11 | // "github.com/sap/project-kb/kaybee/internal/goal" 12 | ) 13 | 14 | // TODO make constructor method, as in ExportCmd, for testability 15 | 16 | // mergeCmd represents the merge command 17 | var mergeCmd = &cobra.Command{ 18 | Use: "merge", 19 | Short: "Merges statements from local clones of upstream source repositories", 20 | Long: `(CAUTION! OBSOLETE DESCRIPTION) Displays from a list of different source repositories a table of 21 | cves which are possibly conflicting, attempts a soft merge and displays 22 | each as mergeable or not along with the conflicting slices. 23 | 24 | kbsync merge -r [REPO_URL_1] -r [REPO_URL_2] -r ... 25 | `, 26 | Run: doMerge, 27 | } 28 | 29 | var ( 30 | repos []string 31 | skipPull bool 32 | mergePolicyName string 33 | ) 34 | 35 | func init() { 36 | rootCmd.AddCommand(mergeCmd) 37 | 38 | // mergeCmd.Flags().StringSliceVarP(&repos, "repo", "r", []string{}, "repositories to diff") 39 | // mergeCmd.Flags().BoolVarP(&signed, "signed", "s", false, "Ignore unsigned commits") 40 | mergeCmd.Flags().BoolVarP(&skipPull, "skip-pull", "s", false, "Do not pull from remote repositories (only use their local copies)") 41 | mergeCmd.Flags().StringVarP(&mergePolicyName, "policy", "p", "strict", "Merge policy (default: strict") 42 | 43 | } 44 | 45 | func doMerge(cmd *cobra.Command, args []string) { 46 | if verbose { 47 | fmt.Println("Merging statements...") 48 | } 49 | 50 | if skipPull { 51 | if verbose { 52 | fmt.Println("Skipping pull, only local clones will be considered") 53 | } 54 | } else { 55 | doPull(cmd, args) 56 | } 57 | 58 | t := tasks.NewMergeTask(). 59 | WithPolicy(conf.PolicyFromString(mergePolicyName)). 60 | WithSources(configuration.Sources()) 61 | 62 | t.Verbose(verbose) 63 | t.Execute() 64 | 65 | if verbose { 66 | fmt.Println("Merge completed") 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /kaybee/cmd/pull.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2020 SAP 3 | */ 4 | 5 | package cmd 6 | 7 | import ( 8 | "github.com/sap/project-kb/kaybee/internal/tasks" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // TODO make constructor method, as in ExportCmd, for testability 13 | 14 | // pullCmd represents the pull command 15 | var pullCmd = &cobra.Command{ 16 | Use: "pull", 17 | Short: "Pull vulnerability data from remote repositories into local clones", 18 | Long: `A longer description that spans multiple lines and likely contains examples 19 | and usage of using your command. For example: 20 | 21 | Cobra is a CLI library for Go that empowers applications. 22 | This application is a tool to generate the needed files 23 | to quickly create a Cobra application.`, 24 | Run: doPull, 25 | } 26 | 27 | func init() { 28 | rootCmd.AddCommand(pullCmd) 29 | } 30 | 31 | func doPull(cmd *cobra.Command, args []string) { 32 | t := tasks.NewPullTask(). 33 | WithSources(configuration.Sources()) 34 | 35 | t.Verbose(verbose) 36 | t.Execute() 37 | } 38 | -------------------------------------------------------------------------------- /kaybee/cmd/push.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2019 - 2020, SAP. All rights reserved. 2 | 3 | package cmd 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | // TODO make constructor method, as in ExportCmd, for testability 12 | 13 | // pushCmd represents the push command 14 | var pushCmd = &cobra.Command{ 15 | Use: "push", 16 | Short: "Push a merged text-format Kb to a repository", 17 | Long: `A longer description that spans multiple lines and likely contains examples 18 | and usage of using your command. For example: 19 | 20 | Cobra is a CLI library for Go that empowers applications. 21 | This application is a tool to generate the needed files 22 | to quickly create a Cobra application.`, 23 | Run: doPush, 24 | } 25 | 26 | func init() { 27 | rootCmd.AddCommand(pushCmd) 28 | 29 | // Here you will define your flags and configuration settings. 30 | 31 | // Cobra supports Persistent Flags which will work for this command 32 | // and all subcommands, e.g.: 33 | // pushCmd.PersistentFlags().String("foo", "", "A help for foo") 34 | 35 | // Cobra supports local flags which will only run when this command 36 | // is called directly, e.g.: 37 | // pushCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") 38 | } 39 | 40 | func doPush(cmd *cobra.Command, args []string) { 41 | fmt.Println("Pushing....") 42 | fmt.Println("UNIMPLEMENTED") 43 | fmt.Println("Push completed") 44 | } 45 | -------------------------------------------------------------------------------- /kaybee/cmd/reconcile.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2020 SAP 3 | */ 4 | 5 | package cmd 6 | 7 | import ( 8 | "github.com/sap/project-kb/kaybee/internal/tasks" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | // var vulnerabilityID string 13 | 14 | // reconcileCmd represents the reconcile command 15 | var reconcileCmd = &cobra.Command{ 16 | Use: "reconcile", 17 | Short: "Manually reconcile conflicting statements", 18 | Long: ``, 19 | Run: doReconcile, 20 | } 21 | 22 | func init() { 23 | rootCmd.AddCommand(reconcileCmd) 24 | // reconcileCmd.Flags().StringVarP(&vulnerabilityID, "reconcile", "r", "", "Vulnerability to reconcile") 25 | } 26 | 27 | func doReconcile(cmd *cobra.Command, args []string) { 28 | var vulnerabilityID string = args[0] 29 | 30 | t := tasks.ReconcileTask{ 31 | Sources: configuration.Sources(), 32 | VulnerabilityID: vulnerabilityID, 33 | } 34 | 35 | t.Verbose(verbose) 36 | t.Execute() 37 | } 38 | -------------------------------------------------------------------------------- /kaybee/cmd/root.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2019 - 2020, SAP 2 | 3 | package cmd 4 | 5 | import ( 6 | "fmt" 7 | "log" 8 | "os" 9 | 10 | // homedir "github.com/mitchellh/go-homedir" 11 | 12 | "github.com/sap/project-kb/kaybee/internal/conf" 13 | "github.com/sap/project-kb/kaybee/internal/filesystem" 14 | "github.com/spf13/cobra" 15 | // "gopkg.in/src-d/go-git.v4/storage/filesystem" 16 | ) 17 | 18 | var ( 19 | verbose bool 20 | configuration conf.Configuration 21 | cfgFile string = "kaybeeconf.yaml" 22 | ) 23 | 24 | // TODO make constructor method, as in ExportCmd, for testability 25 | 26 | // rootCmd represents the base command when called without any subcommands 27 | var rootCmd = &cobra.Command{ 28 | Use: "kaybee", 29 | TraverseChildren: true, 30 | Short: "KayBee is a tool to maintain a collaborative, distributed knowledge base of vulnerabilities affecting open-source software.", 31 | Long: `KayBee is a tool to maintain a collaborative, distributed knowledge base of vulnerabilities affecting open-source software.`, 32 | Run: func(cmd *cobra.Command, args []string) { 33 | 34 | }, 35 | } 36 | 37 | // Execute adds all child commands to the root command and sets flags appropriately. 38 | // This is called by main.main(). It only needs to happen once to the rootCmd. 39 | func Execute() { 40 | 41 | if len(os.Args) == 1 { 42 | rootCmd.Help() 43 | os.Exit(0) 44 | 45 | } 46 | if err := rootCmd.Execute(); err != nil { 47 | log.Println(err) 48 | os.Exit(1) 49 | } 50 | } 51 | 52 | func init() { 53 | // OnInitialize sets the passed functions to be run when each command's Execute method is called. 54 | cobra.OnInitialize(initConfig) 55 | rootCmd.PersistentFlags().StringVarP(&cfgFile, "config", "c", cfgFile, "config file") 56 | rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Verbose mode") 57 | } 58 | 59 | func initConfig() { 60 | // fmt.Println("CONFIG: " + cfgFile) 61 | 62 | if !filesystem.IsFile(cfgFile) { 63 | configuration = conf.Configuration{} 64 | return 65 | } 66 | 67 | p, err := conf.NewParser(cfgFile) 68 | if err != nil { 69 | log.Fatal("Error parsing configuration") 70 | } 71 | 72 | c, _ := p.Parse() 73 | if verbose { 74 | fmt.Println("Using config file:", p.Viper.ConfigFileUsed()) 75 | } 76 | 77 | _, err = c.Validate() 78 | if err != nil { 79 | log.Fatalln("Invalid config.") 80 | } 81 | configuration = c 82 | } 83 | -------------------------------------------------------------------------------- /kaybee/cmd/setup.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2019 - 2020, SAP. All rights reserved. 2 | 3 | package cmd 4 | 5 | import ( 6 | "fmt" 7 | "log" 8 | 9 | "github.com/sap/project-kb/kaybee/internal/tasks" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // TODO make constructor method, as in ExportCmd, for testability 14 | 15 | // mergeCmd represents the merge command 16 | var setupCmd = &cobra.Command{ 17 | Use: "setup", 18 | Short: "Creates a configuration, unless it exists already", 19 | Long: `(CAUTION! OBSOLETE DESCRIPTION) Displays from a list of different source repositories a table of 20 | cves which are possibly conflicting, attempts a soft merge and displays 21 | each as mergeable or not along with the conflicting slices. 22 | 23 | kbsync setup [-i] 24 | `, 25 | Run: doSetup, 26 | } 27 | 28 | var ( 29 | interactive bool 30 | force bool 31 | ) 32 | 33 | func init() { 34 | rootCmd.AddCommand(setupCmd) 35 | setupCmd.Flags().BoolVarP(&interactive, "interactive", "i", false, "Interactive configuration") 36 | setupCmd.Flags().BoolVarP(&force, "force", "f", false, "Force overwrite existing configuration file") 37 | } 38 | 39 | func doSetup(cmd *cobra.Command, args []string) { 40 | fmt.Println("Running setup...") 41 | 42 | if interactive { 43 | fmt.Println("Interactive mode (not implemented yet)") 44 | log.Fatal("Aborting.") 45 | } else { 46 | fmt.Println("Non-interactive mode") 47 | } 48 | 49 | t := tasks.NewSetupTask(). 50 | WithInteractiveMode(interactive). 51 | WithForce(force) 52 | 53 | t.Verbose(verbose) 54 | t.Execute() 55 | 56 | fmt.Println("Setup completed") 57 | 58 | } 59 | -------------------------------------------------------------------------------- /kaybee/cmd/update.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2019 - 2020, SAP. All rights reserved. 2 | 3 | package cmd 4 | 5 | import ( 6 | "fmt" 7 | "log" 8 | 9 | "github.com/blang/semver" 10 | "github.com/rhysd/go-github-selfupdate/selfupdate" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var forceUpdate bool 15 | 16 | var updateCmd = &cobra.Command{ 17 | Use: "update", 18 | Short: "Creates a configuration, unless it exists already", 19 | Long: `(CAUTION! OBSOLETE DESCRIPTION) Displays from a list of different source repositories a table of 20 | cves which are possibly conflicting, attempts a soft merge and displays 21 | each as mergeable or not along with the conflicting slices. 22 | 23 | kbsync update [-i] 24 | `, 25 | Run: doUpdate, 26 | } 27 | 28 | func init() { 29 | rootCmd.AddCommand(updateCmd) 30 | updateCmd.Flags().BoolVarP(&forceUpdate, "force", "f", false, "Upgrade to the latest version, if different from the one in use") 31 | } 32 | 33 | func doUpdate(cmd *cobra.Command, args []string) { 34 | fmt.Print("Checking new releases...\n") 35 | 36 | // fmt.Println("You currently have version: " + version) 37 | latest, ok, e := selfupdate.DetectLatest("sap/project-kb") 38 | if e != nil { 39 | log.Fatal("error: ", e) 40 | } 41 | 42 | if ok { 43 | latestSemVer := semver.MustParse(latest.Version.String()) 44 | 45 | // // TESTING 46 | // version = "0.0.1" 47 | currentSemVer := semver.MustParse(version) 48 | // currentSemVer := semver.MustParse("0.1.1") 49 | 50 | if latestSemVer.Compare(currentSemVer) > 0 { 51 | fmt.Printf("Newer version detected\n") 52 | fmt.Println("Latest version available: " + latest.Version.String()) 53 | fmt.Println("You are currently using: " + currentSemVer.String()) 54 | 55 | if forceUpdate { 56 | fmt.Println("Please wait while downloading and upgrading to version " + latest.Version.String()) 57 | newVersion, err := selfupdate.UpdateSelf(currentSemVer, "sap/project-kb") 58 | if err != nil { 59 | fmt.Println("Could not update to new version. Aborting.") 60 | return 61 | } 62 | fmt.Print("Done upgrading to version " + newVersion.Version.String()) 63 | } else { 64 | fmt.Println("Please download it from: " + latest.URL) 65 | fmt.Println("or run 'kaybee update --force' to download and install automatically.") 66 | } 67 | } else { 68 | fmt.Println("You have the latest version.") 69 | } 70 | 71 | } else { 72 | fmt.Println("Could not check the latest available version, you may want to retry later.") 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /kaybee/cmd/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2020 SAP 3 | */ 4 | 5 | package cmd 6 | 7 | import ( 8 | "fmt" 9 | 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | var ( 14 | buildDate string 15 | buildCommitID string 16 | version string 17 | ) 18 | 19 | // versionCmd represents the version command 20 | var versionCmd = &cobra.Command{ 21 | Use: "version", 22 | Short: "Display version and build information", 23 | Long: `A longer description that spans multiple lines and likely contains examples 24 | and usage of using your command. For example: 25 | 26 | Cobra is a CLI library for Go that empowers applications. 27 | This application is a tool to generate the needed files 28 | to quickly create a Cobra application.`, 29 | Run: func(cmd *cobra.Command, args []string) { 30 | printBanner() 31 | fmt.Println("Version: " + version) 32 | fmt.Println("Build time: " + buildDate) 33 | fmt.Println("Built from commit: " + buildCommitID) 34 | fmt.Println("") 35 | doUpdate(cmd, args) 36 | }, 37 | } 38 | 39 | func init() { 40 | rootCmd.AddCommand(versionCmd) 41 | 42 | // Here you will define your flags and configuration settings. 43 | 44 | // Cobra supports Persistent Flags which will work for this command 45 | // and all subcommands, e.g.: 46 | // versionCmd.PersistentFlags().String("foo", "", "A help for foo") 47 | 48 | // Cobra supports local flags which will only run when this command 49 | // is called directly, e.g.: 50 | // versionCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") 51 | } 52 | 53 | func printBanner() { 54 | banner := ` __ __ ____ 55 | / //_/____ _ __ __ / __ ) ___ ___ 56 | / ,< / __ ` + "`" + `// / / // __ |/ _ \ / _ \ 57 | / /| |/ /_/ // /_/ // /_/ // __// __/ 58 | /_/ |_|\__,_/ \__, //_____/ \___/ \___/ 59 | /____/` + "\n\n" 60 | 61 | fmt.Print(banner) 62 | fmt.Printf(" by SAP Security Research\n\n") 63 | fmt.Printf("project \"KB\" -- https://sap.github.io/project-kb\n\n") 64 | fmt.Println("This is KayBee, a tool developed in project \"KB\", to create and maintain\n" + 65 | "a collaborative, distributed knowledge base about vulnerabilities of open-source software.\n") 66 | 67 | } 68 | -------------------------------------------------------------------------------- /kaybee/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sap/project-kb/kaybee/kaybee 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/blang/semver v3.5.1+incompatible 7 | github.com/gin-gonic/contrib v0.0.0-20201101042839-6a891bf89f19 8 | github.com/gin-gonic/gin v1.7.1 9 | github.com/go-git/go-git/v5 v5.3.0 10 | github.com/gookit/color v1.4.2 11 | github.com/magiconair/properties v1.8.5 12 | github.com/markbates/pkger v0.17.1 13 | github.com/mitchellh/go-homedir v1.1.0 14 | github.com/package-url/packageurl-go v0.1.0 15 | github.com/rhysd/go-github-selfupdate v1.2.3 16 | github.com/sap/project-kb v0.6.18 17 | github.com/schollz/progressbar/v2 v2.15.0 18 | github.com/spf13/cobra v1.1.3 19 | github.com/spf13/viper v1.7.1 20 | github.com/stretchr/testify v1.7.0 21 | gopkg.in/yaml.v2 v2.4.0 22 | ) 23 | -------------------------------------------------------------------------------- /kaybee/internal/browser/browser.go: -------------------------------------------------------------------------------- 1 | package browser 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os/exec" 7 | "runtime" 8 | ) 9 | 10 | // OpenURL opens the default browser to the url specified 11 | func OpenURL(url string) { 12 | var err error 13 | 14 | switch runtime.GOOS { 15 | case "linux": 16 | err = exec.Command("xdg-open", url).Start() 17 | case "windows": 18 | err = exec.Command("rundll32", "url.dll,FileProtocolHandler", url).Start() 19 | case "darwin": 20 | err = exec.Command("open", url).Start() 21 | default: 22 | err = fmt.Errorf("unsupported platform: " + runtime.GOOS) 23 | } 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /kaybee/internal/conf/conf_test.go: -------------------------------------------------------------------------------- 1 | package conf 2 | 3 | import ( 4 | // "github.com/joho/godotenv" 5 | // "log" 6 | "fmt" 7 | "testing" 8 | 9 | "github.com/magiconair/properties/assert" 10 | "github.com/sap/project-kb/kaybee/internal/errors" 11 | ) 12 | 13 | // func init() { 14 | // err := godotenv.Load() 15 | // if err != nil { 16 | // log.Fatal("error loading .env file") 17 | // } 18 | // } 19 | 20 | func TestParser(t *testing.T) { 21 | 22 | p, err := NewParser("../../testdata/conf/kaybeeconf.yaml") 23 | if err != nil { 24 | return 25 | } 26 | c, _ := p.Parse() 27 | fmt.Printf("\n%v\n", c) 28 | 29 | // isValid := c.Validate() 30 | fmt.Printf("\nVersion: %s\n", c.Version()) 31 | 32 | assert.Equal(t, len(c.Sources()), 2, "Found wrong number of sources") 33 | } 34 | 35 | func TestParserNoVersion(t *testing.T) { 36 | 37 | p, err := NewParser("../../testdata/conf/kaybeeconf_noversion.yaml") 38 | if err != nil { 39 | return 40 | } 41 | _, err = p.Parse() 42 | assert.Equal(t, err, errors.ErrConfigVersionNotDefined) 43 | } 44 | 45 | func TestParserMalformed1(t *testing.T) { 46 | 47 | p, err := NewParser("../../testdata/conf/sample_kbsync_malformed.yaml") 48 | if err != nil { 49 | return 50 | } 51 | _, err = p.Parse() 52 | assert.Equal(t, err, errors.ErrConfigVersionNotDefined) 53 | 54 | } 55 | 56 | // func TestImportScriptSettings(t *testing.T) { 57 | // p, err := NewParser() 58 | // p.SetConfigFile("../../myconfig") 59 | // if err != nil { 60 | // return 61 | // } 62 | // c, _ := p.Parse() 63 | // fmt.Printf("%v", c) 64 | // } 65 | -------------------------------------------------------------------------------- /kaybee/internal/conf/v1/conf_v1.go: -------------------------------------------------------------------------------- 1 | package v1 2 | 3 | // ------------------------ 4 | // --- Configuration V1 --- 5 | // ------------------------ 6 | 7 | // A Configuration represents a configuration object conforming to V.1 of the 8 | // configuration schema 9 | type Configuration struct { 10 | Version string `yaml:"apiVersion"` 11 | Backend string `yaml:"backend"` 12 | ExportDenylist []string `yaml:"exportdenylist"` // for some reason export_denylist is not a good key (Viper bug?) 13 | Sources []Source `yaml:"sources"` 14 | Policies []string `yaml:"policies"` 15 | Export []ExportScript `yaml:"export"` 16 | } 17 | 18 | // A Source represents a remote repository in which vulnerability statements are stored 19 | type Source struct { 20 | Repo string `yaml:"repo"` 21 | Branch string `yaml:"branch"` 22 | Signed bool `yaml:"signed"` 23 | Rank int `yaml:"rank"` 24 | } 25 | 26 | // ExportScript defines how to generate import scripts for a set of statements 27 | type ExportScript struct { 28 | Target string `yaml:"target"` 29 | Filename string `yaml:"filename"` 30 | Pre string `yaml:"pre"` 31 | Each string `yaml:"each"` 32 | Post string `yaml:"post"` 33 | } 34 | -------------------------------------------------------------------------------- /kaybee/internal/conf/v2/conf_v2.go: -------------------------------------------------------------------------------- 1 | package conf 2 | 3 | // ------------------------ 4 | // --- Configuration V2 --- 5 | // ------------------------ 6 | 7 | // A ConfigV2 represents a configuration object conforming to V.2 of the 8 | // configuration schema 9 | type ConfigV2 struct { 10 | Version string `yaml:"apiVersion"` 11 | Backend string 12 | ExportBlacklist map[string][]string 13 | Sources map[int]Source 14 | Policies []Policy 15 | //ParsedExport map[string][]*regexp.Regexp 16 | } 17 | 18 | // A Source represents a remote repository in which vulnerability statements are stored 19 | type Source struct { 20 | Repo string `yaml:"repo"` 21 | Branch string `yaml:"branch"` 22 | Signed bool `yaml:"signed"` 23 | } 24 | 25 | // A Policy determines how statements from different source repositories are 26 | // to be merged (reconciled, when conflicting) 27 | type Policy int 28 | 29 | // Validate the configuration 30 | func (c ConfigV2) Validate() (result bool) { 31 | if c.Backend == "" { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | // GetSources returns a slice of sources 39 | func (c *ConfigV2) GetSources() map[int]Source { 40 | return c.Sources 41 | } 42 | -------------------------------------------------------------------------------- /kaybee/internal/errors/errors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | // ErrUntrustedContributor is thrown when a contributor is not trusted 9 | ErrUntrustedContributor = errors.New("Contributor not found in list of trusted contributors") 10 | 11 | // ErrConflictingStatements is thrown when two staments are not independent and cannot be merged with the current policy 12 | ErrConflictingStatements = errors.New("Conflicting statements") 13 | 14 | // ErrNonIndependentStatements is thrown when two staments are not independent 15 | ErrNonIndependentStatements = errors.New("Non-independent statements") 16 | 17 | // ErrNothingToReconcile is thrown when two staments are independent and no reconciliation is needed 18 | ErrNothingToReconcile = errors.New("Statements are independent, nothing to reconcile") 19 | ) 20 | 21 | var ( 22 | // ErrConfigBackendRequired is thrown when no backend is supplied in a configuration 23 | ErrConfigBackendRequired = errors.New("Backend is required in sync configuration") 24 | 25 | // ErrConfigValidationFailed is thrown when the validation of the configuration fails 26 | ErrConfigValidationFailed = errors.New("Sync configuration validation failed") 27 | 28 | // ErrConfigVersionNotDefined is thrown when no API version is specified in a configuration 29 | ErrConfigVersionNotDefined = errors.New("ApiVersion not defined in configuration file") 30 | 31 | // ErrConfigInvalidImportRegex is thrown when an invalid import regex is specified in a configuration 32 | ErrConfigInvalidImportRegex = errors.New("Invalid import regex") 33 | 34 | // ErrConfigInvalidExportRegex is thrown when an invalid export regex is specified in a configuration 35 | ErrConfigInvalidExportRegex = errors.New("Invalid export regex") 36 | 37 | // ErrConfigPolicyMissing is thrown when no policy is specified 38 | ErrConfigPolicyMissing = errors.New("Policy not set") 39 | 40 | // ErrConfigUnknownPolicy is thrown when an unknown/invalid policy is specified in a configuration 41 | ErrConfigUnknownPolicy = errors.New("Unknown policy listed in policies") 42 | 43 | // ErrConfigInvalidSourceURL is thrown when a source repository is specified without a valid URL 44 | ErrConfigInvalidSourceURL = errors.New("Source does not have a repo associated") 45 | 46 | // ErrConfigInvalid is used to signal any configuration error that is not covered by the other config errors 47 | ErrConfigInvalid = errors.New("Configuration is invalid") 48 | ) 49 | -------------------------------------------------------------------------------- /kaybee/internal/filesystem/file_test.go: -------------------------------------------------------------------------------- 1 | package filesystem 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestParsePath(t *testing.T) { 11 | // Test path parsing successful 12 | if _, _, _, valid := ParsePath("statements/cve_1234/statement.yaml"); !valid { 13 | t.Errorf("path [data/cve_1234/statement.yaml] should be valid during parsing") 14 | } 15 | 16 | // Test invalid length path 17 | if _, _, _, valid := ParsePath("ichbinfrog/statement.yaml"); valid { 18 | t.Errorf("path [ichbinfrog/statement.yaml] should be invalid") 19 | } 20 | 21 | // Test invalid root dir parse 22 | if _, _, _, valid := ParsePath("root/invalid_root/cve_1234/statement.yaml"); valid { 23 | t.Errorf("path [root/invalid_root/cve_1234/statement.yaml] should be invalid") 24 | } 25 | } 26 | 27 | func TestGetPath(t *testing.T) { 28 | // Test keypath 29 | keyPath := GetKeyPath("tests") 30 | if keyPath != "tests/"+KeyPath { 31 | t.Errorf("keypath [%s] is malformed, should be tests/%s", keyPath, KeyPath) 32 | } 33 | 34 | // Test datapath 35 | dataPath := GetDataPath("ichbinfrog") 36 | if dataPath != "ichbinfrog/"+DataPath { 37 | t.Errorf("datapath [%s] is malformed, should be ichbinfrog/%s", dataPath, DataPath) 38 | } 39 | } 40 | 41 | // func TestGetPubKey(t *testing.T) { 42 | // // Test valid key 43 | // keys, err := GetPubKey(GetKeyPath("../tests")) 44 | // if err != nil || len(keys) == 0 { 45 | // t.Errorf("failed to parse key [tests/signature/test.asc]") 46 | // } 47 | 48 | // if len(keys) != 1 { 49 | // t.Errorf("error with filter, only one key should be present") 50 | // } 51 | // } 52 | 53 | func TestDirUtils(t *testing.T) { 54 | // File 55 | if IsDir("../../testdata/steady/all_bugs.json") { 56 | t.Errorf("../../testdata/steady/all_bugs.json should be recognised as a file") 57 | } 58 | // Dir 59 | if !IsDir("../../testdata/steady") { 60 | t.Errorf("../../testdata/steady should be recognised as a directory") 61 | } 62 | // Non existent 63 | if IsDir("holla") { 64 | t.Errorf("holla should not be recognised as a directory") 65 | } 66 | 67 | os.RemoveAll("tests/test_dir") 68 | // Non recursive non existent directory 69 | if err := CreateDir("tests/test_dir"); err != nil { 70 | t.Errorf("createdir should succeed with path tests/test_dir") 71 | } 72 | // // Recursive creation 73 | // if err := CreateDir("tests/test_dir/test_dir/test_dir"); err == nil { 74 | // t.Errorf("createdir should fail with path tests/test_dir/test_dir/test_dir when recursive false") 75 | // } 76 | os.RemoveAll("tests/test_dir") 77 | } 78 | 79 | func TestIsFile(t *testing.T) { 80 | assert.True(t, IsFile("/etc/issue")) 81 | assert.False(t, IsFile("/etc/issue/fdsafjfas")) 82 | } 83 | 84 | // func TestURLParse(t *testing.T) { 85 | // if _, err := GetRepoPath("https://github.com/sap/project-kb/kaybee", "."); err != nil { 86 | // t.Error(err) 87 | // } 88 | // } 89 | -------------------------------------------------------------------------------- /kaybee/internal/model/mergelog_test.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | var ( 8 | st3 = Statement{ 9 | VulnerabilityID: "cve-1234-5678", 10 | Aliases: []Alias{ 11 | "alias-01", 12 | "alias-02", 13 | }, 14 | Notes: []Note{ 15 | { 16 | Links: []string{ 17 | "some_link", 18 | "another_link", 19 | }, 20 | Text: "Some note about cve-1234-5678", 21 | }, 22 | }, 23 | Metadata: Metadata{ 24 | LocalPath: "/tmp/statement-03", 25 | Origin: "https://github.com/copernico/vulnerability_data", 26 | Branch: "master", 27 | OriginRank: 20, 28 | }, 29 | } 30 | 31 | st4 = Statement{ 32 | VulnerabilityID: "cve-1234-5678", 33 | Aliases: []Alias{ 34 | "alias-03", 35 | "alias-04", 36 | }, 37 | Notes: []Note{ 38 | { 39 | Text: "Some additional note about cve-1234-5678", 40 | }, 41 | }, 42 | Metadata: Metadata{ 43 | LocalPath: "/tmp/statement-04", 44 | Origin: "https://github.com/someoneelse/oss_vulnerabilities", 45 | Branch: "master", 46 | OriginRank: 10, 47 | }, 48 | } 49 | ) 50 | 51 | func TestMergeLog(t *testing.T) { 52 | ml := NewMergeLog("ex_1234") 53 | logEntry := MergeLogEntry{ 54 | logMessage: "Sample log message", 55 | sourceStatements: []Statement{ 56 | st3, 57 | st4, 58 | }, 59 | success: true, 60 | policy: "soft", 61 | } 62 | ml.Log(logEntry) 63 | ml.Dump(".") 64 | } 65 | -------------------------------------------------------------------------------- /kaybee/internal/model/policy.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // See https://github.com/tmrts/go-patterns/blob/master/behavioral/policy.md 4 | // for an explanation of the pattern adopted here 5 | 6 | // The StatementReconciler interface defines the types that have the capability to reconcile statements 7 | // that are not independent and how to reduce sets of statements by applying such reconcile operation 8 | // to non-independent statements 9 | type StatementReconciler interface { 10 | Reconcile([]Statement) ReconcileResult 11 | Reduce(stmts map[string][]Statement) (map[string][]Statement, MergeLog, error) 12 | } 13 | 14 | // ReconcileResult encodes the result of a reconcile operation 15 | type ReconcileResult struct { 16 | reconciledStatement Statement 17 | candidateStatements []Statement 18 | comment string 19 | success bool 20 | } 21 | 22 | // Policy represents a way to reconcile non-independent statements and how 23 | // to reduce sets of statements merging those that can be reconciled 24 | type Policy struct { 25 | reconciler StatementReconciler 26 | } 27 | 28 | // Reconcile merges two statements into one as specified in the Merger object 29 | func (s *Policy) Reconcile(statements []Statement) ReconcileResult { 30 | // the actual Merge() that is invoked is the one defined 31 | // in a type that implements the StatementReconciler interface and 32 | // an instance of which is assigned to the reconciler field of a Policy instance 33 | return s.reconciler.Reconcile(statements) 34 | } 35 | 36 | // Reduce scans a list of Statements and merges those that can be reconciled 37 | func (s *Policy) Reduce(stmts map[string][]Statement) (map[string][]Statement, MergeLog, error) { 38 | // the actual Merge() that is invoked is the one defined 39 | // in a type that implements the StatementReconciler interface and 40 | // an instance of which is assigned to the reconciler field of a Policy instance 41 | return s.reconciler.Reduce(stmts) 42 | } 43 | -------------------------------------------------------------------------------- /kaybee/internal/model/policy_null.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // NullPolicy implements a policy that does nothing 4 | type NullPolicy struct{} 5 | 6 | // NewNullPolicy constructs a new NullPolicy instance 7 | func NewNullPolicy() Policy { 8 | return Policy{reconciler: NullPolicy{}} 9 | } 10 | 11 | // Reconcile just returns the first of the two statements as is 12 | func (st NullPolicy) Reconcile(statements []Statement) ReconcileResult { 13 | return ReconcileResult{ 14 | reconciledStatement: Statement{}, 15 | candidateStatements: statements, 16 | comment: "Method Reconcile() does nothing in the NullPolicy!", 17 | success: false, 18 | } 19 | } 20 | 21 | // Reduce just returns the same statements as passed in input 22 | func (st NullPolicy) Reduce(stmts map[string][]Statement) (map[string][]Statement, MergeLog, error) { 23 | return stmts, MergeLog{}, nil 24 | } 25 | -------------------------------------------------------------------------------- /kaybee/internal/model/policy_strict.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/gookit/color" 8 | ) 9 | 10 | // StrictPolicy refuses to solve conflicts and does not perform any reconcile action; 11 | // In other words, non-independent statements are not reconciled, but reported to the user 12 | // who might then want to merge them manually 13 | type StrictPolicy struct { 14 | } 15 | 16 | // NewStrictPolicy creates and initializes a new StrictPolicy instance 17 | func NewStrictPolicy() Policy { 18 | return Policy{ 19 | reconciler: StrictPolicy{}, 20 | } 21 | } 22 | 23 | // Reconcile does nothing (returns always a void Statement); if the two statements in input 24 | // are not independent a suitable error signals it 25 | // This is implemented just to satisfy the StatementReconciler interface, but this method 26 | // is not supposed to be called ever. 27 | func (p StrictPolicy) Reconcile(statements []Statement) ReconcileResult { 28 | log.Fatal("Method Reconcile() should not be invoked on a StrictPolicy!") 29 | return ReconcileResult{ 30 | reconciledStatement: Statement{}, 31 | candidateStatements: statements, 32 | comment: "Method Reconcile() should not be invoked on a StrictPolicy!", 33 | success: false, 34 | } 35 | } 36 | 37 | // Reduce only keeps independent statemens and discards statements that are non-independent 38 | func (p StrictPolicy) Reduce(stmts map[string][]Statement) (map[string][]Statement, MergeLog, error) { 39 | var mergeLog = NewMergeLog("exec_123456789") 40 | var logEntry MergeLogEntry 41 | 42 | var statementsToReconcile []Statement 43 | 44 | for s := range stmts { 45 | conflictingStatementsCount := len(stmts[s]) 46 | statementsToReconcile = stmts[s] 47 | 48 | if conflictingStatementsCount > 1 { 49 | fmt.Println("") 50 | color.Warn.Prompt("Found %d conflicting statements for vulnerability %s -- they will not be reconciled with policy 'strict'.", len(stmts[s]), s) 51 | color.Info.Prompt("You may want to try again using another policy, which you can specify with the '-p' flag.") 52 | color.Info.Prompt("Example: kaybee merge -p soft") 53 | delete(stmts, s) 54 | logEntry = MergeLogEntry{ 55 | policy: "STRICT", 56 | logMessage: fmt.Sprintf("Found %d conflicting statements about vuln. %s; won't reconcile!", conflictingStatementsCount, s), 57 | sourceStatements: statementsToReconcile, 58 | resultingStatement: Statement{}, 59 | success: false, 60 | } 61 | } else { 62 | logEntry = MergeLogEntry{ 63 | policy: "STRICT", 64 | logMessage: fmt.Sprintf("Found a single statement about vuln. '%s'", s), 65 | sourceStatements: statementsToReconcile, 66 | resultingStatement: statementsToReconcile[0], 67 | success: true, 68 | } 69 | } 70 | mergeLog.Log(logEntry) 71 | 72 | } 73 | 74 | return stmts, mergeLog, nil 75 | } 76 | -------------------------------------------------------------------------------- /kaybee/internal/repository/repository_test.go: -------------------------------------------------------------------------------- 1 | package repository 2 | 3 | // func TestFetch(t *testing.T) { 4 | // repository := NewRepository("https://github.com/ichbinfrog/test_2", "master", true, 1, "file:///../../.kaybee/repositories") 5 | 6 | // // defer os.RemoveAll(repository.Path) 7 | 8 | // // Remove old repository 9 | // os.RemoveAll(repository.Path) 10 | 11 | // // Test repository fetching from empty repo 12 | // repository.Fetch(false) 13 | 14 | // // Test repository fetching from preexisting repo 15 | // repository.Fetch(false) 16 | 17 | // // Test metadata fill 18 | // statements, err := repository.Statements() 19 | // if err != nil { 20 | // t.Error(err) 21 | // } 22 | // for _, s := range statements { 23 | // log.Printf("%+v\n", s) 24 | // } 25 | // } 26 | 27 | /* func TestListFetch(t *testing.T) { 28 | repository := &List{} 29 | repository.New([]string{ 30 | "https://github.com/ichbinfrog/test", 31 | "https://github.com/ichbinfrog/test_2", 32 | }, "master", ".") 33 | 34 | repository.Fetch() 35 | } */ 36 | -------------------------------------------------------------------------------- /kaybee/internal/tasks/pull.go: -------------------------------------------------------------------------------- 1 | package tasks 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/sap/project-kb/kaybee/internal/conf" 8 | "github.com/sap/project-kb/kaybee/internal/repository" 9 | ) 10 | 11 | // PullTask is the task that performs merging of statements, reconciling any 12 | // conflicts using a set of pre-defined policies. 13 | type PullTask struct { 14 | BaseTask 15 | sources []conf.Source 16 | } 17 | 18 | // NewPullTask constructs a new MergeTask 19 | func NewPullTask() *PullTask { 20 | 21 | t := PullTask{} 22 | return &t 23 | } 24 | 25 | // WithSources sets the sources to be merged 26 | func (t *PullTask) WithSources(sources []conf.Source) *PullTask { 27 | t.sources = sources 28 | return t 29 | } 30 | 31 | func (t *PullTask) validate() (ok bool) { 32 | if len(t.sources) < 1 { 33 | log.Fatalln("No sources to pull. Aborting.") 34 | return false 35 | } 36 | 37 | return true 38 | } 39 | 40 | // Execute performs the actual task and returns true on success 41 | func (t *PullTask) Execute() (success bool) { 42 | // fmt.Println("[+] Running pull task") 43 | 44 | // cfg, _ := ctx.Get("configuration").(conf.Configuration) 45 | // verbose := ctx.Get("verbose").(bool) 46 | // c := ctx.Get("configuration").(conf.Configuration) 47 | // for _, v := range c.Sources() { 48 | // fmt.Printf("%s\n", v.Repo) 49 | // } 50 | 51 | t.validate() 52 | for _, src := range t.sources { 53 | if t.verbose { 54 | fmt.Printf("\nPulling source: %s\n", src.Repo) 55 | } 56 | repository := repository.NewRepository(src.Repo, src.Branch, true, src.Rank, ".kaybee/repositories") 57 | repository.Fetch(t.verbose) 58 | if t.verbose { 59 | fmt.Printf("Done with %s\n", src.Repo) 60 | } 61 | } 62 | 63 | // fmt.Println("[+] Pull task completed") 64 | return true 65 | } 66 | -------------------------------------------------------------------------------- /kaybee/internal/tasks/reconcile.go: -------------------------------------------------------------------------------- 1 | package tasks 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sap/project-kb/kaybee/internal/conf" 7 | ) 8 | 9 | // ReconcileTask is the task that performs merging of statements, reconciling any 10 | // conflicts using a set of pre-defined policies. 11 | type ReconcileTask struct { 12 | BaseTask 13 | Sources []conf.Source 14 | VulnerabilityID string 15 | } 16 | 17 | // NewReconcileTask constructs a new ReconcileTask 18 | func NewReconcileTask() (mergeTask *ReconcileTask) { 19 | 20 | mt := ReconcileTask{} 21 | return &mt 22 | } 23 | 24 | func (t *ReconcileTask) validate() (ok bool) { 25 | 26 | return true 27 | } 28 | 29 | // Execute performs the actual task and returns true on success 30 | func (t *ReconcileTask) Execute() (success bool) { 31 | 32 | if t.verbose { 33 | fmt.Println("Reconciling statements for vulnerability ID: " + t.VulnerabilityID) 34 | fmt.Println("Using sources:") 35 | for _, s := range t.Sources { 36 | fmt.Println(s) 37 | } 38 | } 39 | 40 | t.validate() 41 | 42 | fmt.Println("WARNING: Reconcile task not implemented yet!") 43 | 44 | return true 45 | } 46 | -------------------------------------------------------------------------------- /kaybee/internal/tasks/task.go: -------------------------------------------------------------------------------- 1 | // Package tasks contains all implementations of tasks; these are instantiated 2 | // and run from the main package 3 | package tasks 4 | 5 | // "github.com/sap/project-kb/kaybee/internal/conf" 6 | 7 | // The Task interface defines the behaviour that all tasks must implement 8 | type Task interface { 9 | Execute() (success bool) 10 | validate() (ok bool) 11 | } 12 | 13 | // BaseTask defines the basis for all task implementations 14 | type BaseTask struct { 15 | verbose bool 16 | } 17 | 18 | // Verbose makes the task verbose 19 | func (t *BaseTask) Verbose(v bool) { 20 | t.verbose = v 21 | } 22 | 23 | func (t *BaseTask) validate() bool { 24 | return true 25 | } 26 | -------------------------------------------------------------------------------- /kaybee/internal/tasks/tasks_test.go: -------------------------------------------------------------------------------- 1 | package tasks 2 | 3 | import ( 4 | "log" 5 | "testing" 6 | 7 | "github.com/sap/project-kb/kaybee/internal/conf" 8 | ) 9 | 10 | // 11 | // FIXTURES 12 | // 13 | func getConfig() conf.Configuration { 14 | p, err := conf.NewParser("../../testdata/conf/kaybeeconf.yaml") 15 | // p.SetConfigFile() 16 | if err != nil { 17 | return conf.Configuration{} 18 | } 19 | c, _ := p.Parse() 20 | return c 21 | } 22 | 23 | // 24 | // EXPORT 25 | // 26 | // func TestExporterPool(t *testing.T) { 27 | 28 | // vulasBackend := "https://vulas.tools.sap/" 29 | 30 | // ep, err := NewExporterPool(vulasBackend, 2, 4, map[string][]*regexp.Regexp{}) 31 | // if err != nil { 32 | // t.Error(err) 33 | // } 34 | 35 | // results := ep.Run() 36 | 37 | // if len(results) == 0 { 38 | // t.Error("no results, error fetching data") 39 | // } 40 | 41 | // // fmt.Printf("%v", results) 42 | // marshaled, _ := yaml.Marshal(results) 43 | // fmt.Printf(string(marshaled)) 44 | 45 | // } 46 | 47 | // 48 | // MERGE 49 | // 50 | func TestMergeTask(t *testing.T) { 51 | 52 | conf := getConfig() 53 | log.Printf("config: %v", conf) 54 | 55 | // merge needs pull! 56 | pullTask := NewPullTask().WithSources(conf.Sources()) 57 | pullTask.Execute() 58 | 59 | task := NewMergeTask(). 60 | WithPolicy(conf.Policies()[0]). 61 | WithSources(conf.Sources()) 62 | task.Execute() 63 | } 64 | 65 | // 66 | // PULL 67 | // 68 | func TestPullTask(t *testing.T) { 69 | conf := getConfig() 70 | log.Printf("config: %v", conf) 71 | 72 | pullTask := NewPullTask().WithSources(conf.Sources()) 73 | pullTask.Execute() 74 | } 75 | -------------------------------------------------------------------------------- /kaybee/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2019 - 2020, SAP 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import "github.com/sap/project-kb/kaybee/cmd" 19 | 20 | func main() { 21 | cmd.Execute() 22 | } 23 | -------------------------------------------------------------------------------- /kaybee/testdata/conf/sample_kbsync_exclude_regex.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | backend: "restbackend-service:8080" 3 | 4 | export_denylist: 5 | bugid: 6 | - "[[:alpha:]" 7 | 8 | sources: 9 | 1: https://github.com/ichbinfrog/test_2 10 | 2: https://github.com/ichbinfrog/test_1 11 | -------------------------------------------------------------------------------- /kaybee/testdata/conf/sample_kbsync_invalid_policy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "v1" 2 | backend: "restbackend-service:8080" 3 | 4 | export_denylist: 5 | bugid: 6 | - "CVE.*" 7 | - "az" 8 | 9 | import_denylist: 10 | description: 11 | - apzoeirpoiapeori 12 | 13 | sources: 14 | 1: https://github.com/ichbinfrog/test_2 15 | 2: https://github.com/ichbinfrog/test_1 16 | 17 | policies: 18 | 1: soft 19 | 2: invalid_policy 20 | -------------------------------------------------------------------------------- /kaybee/testdata/conf/sample_kbsync_malformed.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | - "holla" 3 | -------------------------------------------------------------------------------- /kaybee/testdata/conf/sample_kbsync_nobackend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | 3 | export_denylist: 4 | bugid: 5 | - "CVE.*" 6 | - "az" 7 | 8 | import_denylist: 9 | description: 10 | - apzoeirpoiapeori 11 | 12 | sources: 13 | 1: https://github.com/ichbinfrog/test_2 14 | 2: https://github.com/ichbinfrog/test_1 15 | -------------------------------------------------------------------------------- /kaybee/testdata/statements/statement_affected_artifacts.yaml: -------------------------------------------------------------------------------- 1 | api_version: v1 2 | 3 | vulnerability_id: "CVE-1234-5678" 4 | aliases: 5 | - "HTTPCLIENT-1234" 6 | - "SOME-OTHER-ID-1234" 7 | 8 | artifacts: 9 | - id: pkg:maven/org.apache.xmlgraphics/batik-anim@[1.9.0,1.9.2)-RELEASE 10 | affected: true 11 | reason: Documented in the project advisory page 12 | - id: pkg:maven/org.apache.xmlgraphics/batik-anim 13 | affected: false 14 | reason: Fixed in 1.9.2 15 | - id: pkg:maven/abcd1234acbd12340987654321 16 | affected: true 17 | reason: Manual analysis of this jar revealed it contains the same vulnerable methods as batik-anim@1.9.1 18 | 19 | notes: 20 | - links: 21 | - "https://nvd.nist.gov/vuln/detail/CVE-2016-2048" 22 | - "https://blahblah.xyz" 23 | text: > 24 | Django 1.9.x before 1.9.2, when ModelAdmin.save_as is set to True, allows remote authenticated users 25 | to bypass intended access restrictions and create ModelAdmin objects via the "Save as New" option when 26 | editing objects and leveraging the "change" permission. 27 | - links: 28 | - "https://nvd.nist.gov/vuln/detail/CVE-2016-2048" 29 | text: "Some text" 30 | -------------------------------------------------------------------------------- /kaybee/testdata/statements/statement_commits.yaml: -------------------------------------------------------------------------------- 1 | vulnerability_id: CVE-2019-0191 2 | aliases: [] 3 | fixes: 4 | - id: "4.1" 5 | commits: 6 | - id: e36a7a66fa08eb5eb253b2b0cec262ffbdef072 7 | repository: https://github.com/apache/karaf/ 8 | 9 | - id: "4.2" 10 | commits: 11 | - id: fef9a618f11a670dc040d903a4b0f9bbc9f3e9c 12 | repository: https://github.com/apache/karaf/ 13 | 14 | notes: 15 | - links: 16 | - https://issues.apache.org/jira/browse/KARAF-6090 17 | - https://lists.apache.org/thread.html/6856aa7ed7dd805eaf65d0e5e95027dda3b2307aacd1ab4a838c5cd1@%3Cuser.karaf.apache.org%3E 18 | text: Apache Karaf kar deployer reads .kar archives and extracts the paths from 19 | the "repository/" and "resources/" entries in the zip file. It then writes out 20 | the content of these paths to the Karaf repo and resources directories. However, 21 | it doesn't do any validation on the paths in the zip file. This means that a malicious 22 | user could craft a .kar file with ".." directory names and break out of the directories 23 | to write arbitrary content to the filesystem. This is the "Zip-slip" vulnerability 24 | - https://snyk.io/research/zip-slip-vulnerability. This vulnerability is low if 25 | the Karaf process user has limited permission on the filesystem. Any Apache Karaf 26 | releases prior 4.2.3 is impacted. 27 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: project "KB" 2 | nav: 3 | - Home: index.md 4 | - Prospector: prospector.md 5 | - Kaybee: kaybee.md 6 | - How to contribute: contributing.md 7 | - FAQ: faq.md 8 | - Team: team.md 9 | 10 | # Repository 11 | repo_name: 'sap/project-kb' 12 | repo_url: 'https://github.com/sap/project-kb' 13 | 14 | # Copyright 15 | copyright: 'Copyright © 2020 - SAP' 16 | 17 | # Extensions 18 | markdown_extensions: 19 | - markdown.extensions.admonition 20 | - markdown.extensions.codehilite: 21 | guess_lang: false 22 | - markdown.extensions.def_list 23 | - markdown.extensions.footnotes 24 | - markdown.extensions.meta 25 | - markdown.extensions.toc: 26 | permalink: true 27 | # title: 'List of questions' 28 | - pymdownx.arithmatex 29 | - pymdownx.betterem: 30 | smart_enable: all 31 | - pymdownx.caret 32 | - pymdownx.critic 33 | - pymdownx.details 34 | #- pymdownx.emoji: 35 | # emoji_generator: !!python/name:pymdownx.emoji.to_svg 36 | - pymdownx.inlinehilite 37 | - pymdownx.keys 38 | - pymdownx.magiclink 39 | - pymdownx.mark 40 | - pymdownx.smartsymbols 41 | - pymdownx.superfences 42 | - pymdownx.tasklist: 43 | custom_checkbox: true 44 | - pymdownx.tilde 45 | - attr_list 46 | - md_in_html 47 | 48 | 49 | # Customization 50 | extra: 51 | social: 52 | #- type: 'github' 53 | # link: 'https://github.com/sap/project-kb' 54 | # - type: 'twitter' 55 | # link: 'https://twitter.com/antoninosabetta' 56 | # - type: 'linkedin' 57 | # link: 'https://linkedin.com/in/squidfunk' 58 | 59 | theme: 60 | name: 'material' 61 | palette: 62 | primary: 'green' 63 | accent: 'green' 64 | icon: 65 | repo: fontawesome/brands/github 66 | feature: 67 | tabs: false 68 | 69 | extra_css: 70 | - css/faq.css 71 | - css/buttons.css 72 | -------------------------------------------------------------------------------- /prospector/.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | 5 | [report] 6 | # Regexes for lines to exclude from consideration 7 | exclude_lines = 8 | # Have to re-enable the standard pragma 9 | pragma: no cover 10 | 11 | # Don't complain about missing debug-only code: 12 | def __repr__ 13 | if self\.debug 14 | 15 | # Don't complain if tests don't hit defensive assertion code: 16 | raise AssertionError 17 | raise NotImplementedError 18 | 19 | # Don't complain if non-runnable code isn't run: 20 | if 0: 21 | if __name__ == .__main__.: 22 | 23 | ignore_errors = True 24 | -------------------------------------------------------------------------------- /prospector/.env-sample: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## I M P O R T A N T N O T I C E 3 | ## 4 | ## Before proceeding, please rename this file as '.env' and adapt 5 | ## the values before to match your environment. At the very least 6 | ## you should change the location of the GIT_CACHE 7 | ############################################################################### 8 | 9 | # NOTE: make sure this directory exists! 10 | # NOTE: do NOT use $VARIABLES here 11 | 12 | GIT_CACHE=a/real/path/to/a/git/cache 13 | CVE_DATA_PATH=a/real/path/to/cve/data 14 | POSTGRES_USER=postgres 15 | POSTGRES_PORT=5432 16 | POSTGRES_HOST=localhost 17 | POSTGRES_DBNAME=postgres 18 | POSTGRES_PASSWORD=example 19 | POSTGRES_DATA=a/real/path/to/a/folder/to/save/postgres/data 20 | REDIS_URL=redis://localhost:6379/0 21 | NVD_API_KEY=APIkey 22 | PYTHONPATH=. 23 | -------------------------------------------------------------------------------- /prospector/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203,E501,W503,E501 E203, E501, W503,F401,F403,W605 3 | exclude = 4 | # No need to traverse our git directory 5 | .git, 6 | empirical_study, 7 | # There's no value in checking cache directories 8 | __pycache__, 9 | # The conf file is mostly autogenerated, ignore it 10 | ; docs/source/conf.py, 11 | # The old directory contains Flake8 2.0 12 | ; old, 13 | # This contains our built documentation 14 | build, 15 | # This contains builds of flake8 that we don't want to check 16 | dist 17 | ; per-file-ignores = 18 | ; __init__.py:F403, 19 | ; __init__.py:F401 20 | max-complexity = 12 21 | max-line-length = 100 22 | -------------------------------------------------------------------------------- /prospector/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - id: check-merge-conflict 9 | - repo: https://github.com/psf/black 10 | rev: 22.10.0 11 | hooks: 12 | - id: black 13 | - repo: https://github.com/pycqa/isort 14 | rev: 5.12.0 15 | hooks: 16 | - id: isort 17 | args: ["--profile", "black", "--filter-files"] 18 | - repo: https://github.com/pycqa/flake8 19 | rev: 5.0.4 20 | hooks: 21 | - id: flake8 22 | args: # arguments to configure flake8 23 | - --max-line-length=100 24 | - --ignore=E203,E501,W503,F401 25 | - --max-complexity=12 26 | # - --per-file-exclude="__init__.py:F401 __init__.py:F403" 27 | # - "--select=B,C,E,F,W,T4,B9" 28 | # these are errors that will be ignored by flake8 29 | # check out their meaning here 30 | # https://flake8.pycqa.org/en/latest/user/error-codes.html 31 | # - "--ignore=E203,E266,E501,W503,F403,F401,E402" 32 | - repo: https://github.com/fsfe/reuse-tool 33 | rev: v1.0.0 34 | hooks: 35 | - id: reuse 36 | -------------------------------------------------------------------------------- /prospector/Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/sh 2 | 3 | GREEN='[\033[0;32m' 4 | YELLOW='[\033[1;33m' 5 | END='\033[0m]' # No Color 6 | DONE="$(GREEN)DONE$(END)" 7 | PROGRESS="$(YELLOW)....$(END)" 8 | 9 | 10 | setup: requirements.txt 11 | @cp config-sample.yaml config.yaml 12 | @cp .env-sample .env 13 | @echo "$(PROGRESS) Installing requirements" 14 | @pip install -r requirements.txt 15 | @echo "$(DONE) Installed requirements" 16 | @echo "$(PROGRESS) Installing pre-commit and spacy model" 17 | @python -m spacy download en_core_web_sm 18 | @echo "$(DONE) Installed pre-commit and spacy model" 19 | 20 | dev-setup: setup requirements-dev.txt 21 | @echo "$(PROGRESS) Installing development requirements" 22 | @pip install -r requirements-dev.txt 23 | @pre-commit install 24 | @echo "$(DONE) Installed development requirements" 25 | 26 | docker-setup: 27 | docker build -t prospector-base:1.0 -f ./docker/Dockerfile . 28 | docker-compose up -d --build 29 | 30 | docker-clean: 31 | @echo "$(PROGRESS) Stopping and removing all container and images" 32 | @docker-compose down --rmi all -v 2>/dev/null 33 | @echo "$(DONE) Stopped and removed all container and images" 34 | 35 | @echo "$(PROGRESS) Cleaning volumes" 36 | @docker volume prune -f 37 | @echo "$(DONE) Cleaned volumes" 38 | 39 | @echo "$(PROGRESS) Cleaning residue" 40 | @docker system prune -a -f 41 | @echo "$(DONE) Cleaned residue" 42 | 43 | 44 | clean: 45 | @rm -f prospector.log 46 | @rm -rf __pycache__ 47 | @rm -rf */__pycache__ 48 | @rm -rf */*/__pycache__ 49 | @rm -rf *.log 50 | @rm -rf .pytest_cache 51 | @rm -rf *.html 52 | @rm -rf *.json 53 | @rm -rf *.sqlite 54 | #@rm -rf $(GIT_CACHE)/* 55 | -------------------------------------------------------------------------------- /prospector/backenddb/README.md: -------------------------------------------------------------------------------- 1 | # commit-db 2 | -------------------------------------------------------------------------------- /prospector/backenddb/__init__.py: -------------------------------------------------------------------------------- 1 | class BackendDB: 2 | def connect(self, connect_string): 3 | raise NotImplementedError("Unimplemented") 4 | -------------------------------------------------------------------------------- /prospector/backenddb/commitdb_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from backenddb.postgres import PostgresBackendDB, parse_connect_string 4 | from datamodel.commit import Commit 5 | 6 | 7 | @pytest.fixture 8 | def setupdb(): 9 | db = PostgresBackendDB("postgres", "example", "localhost", "5432", "postgres") 10 | db.connect() 11 | # db.reset() 12 | return db 13 | 14 | 15 | def test_save_lookup(setupdb: PostgresBackendDB): 16 | commit = Commit( 17 | commit_id="42423b2423", 18 | repository="https://fasfasdfasfasd.com/rewrwe/rwer", 19 | timestamp=121422430, 20 | hunks=1, 21 | message="Some random garbage", 22 | diff=["fasdfasfa", "asf90hfasdfads", "fasd0fasdfas"], 23 | changed_files=["fadsfasd/fsdafasd/fdsafafdsa.ifd"], 24 | message_reference_content=[], 25 | jira_refs={}, 26 | ghissue_refs={"hggdhd": ""}, 27 | cve_refs=["simola3"], 28 | tags=["tag1"], 29 | ) 30 | setupdb.save(commit.to_dict()) 31 | result = setupdb.lookup( 32 | "https://fasfasdfasfasd.com/rewrwe/rwer", 33 | "42423b2423", 34 | ) 35 | 36 | retrieved_commit = Commit.parse_obj(result[0]) 37 | assert commit.commit_id == retrieved_commit.commit_id 38 | 39 | 40 | def test_lookup_nonexisting(setupdb: PostgresBackendDB): 41 | result = setupdb.lookup( 42 | "https://fasfasdfasfasd.com/rewrwe/rwer", 43 | "42423b242342423b2423", 44 | ) 45 | assert result == [] 46 | -------------------------------------------------------------------------------- /prospector/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/cli/__init__.py -------------------------------------------------------------------------------- /prospector/cli/console.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional 3 | 4 | from colorama import Fore, Style 5 | 6 | 7 | class MessageStatus(Enum): 8 | OK = Fore.GREEN 9 | WARNING = Fore.YELLOW 10 | ERROR = Fore.RED 11 | 12 | 13 | class ConsoleWriter(object): 14 | indent: str = " " 15 | 16 | def __init__(self, message: str): 17 | self._message = message 18 | self.status: MessageStatus = MessageStatus.OK 19 | 20 | def __enter__(self): 21 | print(f"{Fore.LIGHTWHITE_EX}{self._message}{Style.RESET_ALL}", end=" ") 22 | return self 23 | 24 | def __exit__(self, exc_type, exc_val, exc_tb): 25 | if exc_val is not None: 26 | self.status = MessageStatus.ERROR 27 | print( 28 | f"{ConsoleWriter.indent}[{self.status.value}{self.status.name}{Style.RESET_ALL}]", 29 | end="\n", 30 | ) 31 | if exc_val is not None: 32 | raise exc_val 33 | 34 | def set_status(self, status: MessageStatus): 35 | self.status = status 36 | 37 | def print__(self, note: str, status: Optional[MessageStatus] = None): 38 | print(f"{ConsoleWriter.indent}{Fore.WHITE}{note}", end="\n") 39 | if isinstance(status, MessageStatus): 40 | self.set_status(status) 41 | 42 | @staticmethod 43 | def print(note: str, status: Optional[MessageStatus] = None): 44 | print(f"{ConsoleWriter.indent}{Fore.WHITE}{note}", end=" ") 45 | 46 | @staticmethod 47 | def print_(status: MessageStatus): 48 | print(f"[{status.value}{status.name}{Style.RESET_ALL}]", end="\n") 49 | -------------------------------------------------------------------------------- /prospector/config-sample.yaml: -------------------------------------------------------------------------------- 1 | # Wheter to preprocess only the repository's commits or fully run prospector 2 | preprocess_only: False 3 | 4 | # Maximum number of commits to process 5 | max_candidates: 2000 6 | 7 | fetch_references: False 8 | 9 | # Wether to use the NVD database or not 10 | use_nvd: True 11 | 12 | # The NVD API token 13 | # nvd_token: 14 | 15 | # Wheter to use a backend or not: "always", "never", "optional" 16 | use_backend: optional 17 | 18 | # Backend address; when in containerised version, use http://backend:8000, otherwise http://localhost:8000 19 | backend: http://localhost:8000 20 | 21 | database: 22 | user: postgres 23 | password: example 24 | host: localhost # Database address; when in containerised version, use 'db', otherwise 'localhost' 25 | port: 5432 26 | dbname: postgres 27 | 28 | redis_url: redis://redis:6379/0 29 | 30 | # LLM Usage (check README for help) 31 | # llm_service: 32 | # type: sap # use "sap" or "third_party" 33 | # model_name: gpt-4-turbo 34 | # temperature: 0.0 # optional, default is 0.0 35 | # ai_core_sk: # needed for type: sap 36 | 37 | # use_llm_repository_url: False # whether to use LLM's to obtain the repository URL 38 | 39 | enabled_rules: 40 | # Phase 1 Rules 41 | - VULN_ID_IN_MESSAGE 42 | - XREF_BUG 43 | - XREF_GH 44 | - COMMIT_IN_REFERENCE 45 | - VULN_ID_IN_LINKED_ISSUE 46 | - CHANGES_RELEVANT_FILES 47 | - CHANGES_RELEVANT_CODE 48 | - RELEVANT_WORDS_IN_MESSAGE 49 | - ADV_KEYWORDS_IN_FILES 50 | - ADV_KEYWORDS_IN_MSG 51 | - SEC_KEYWORDS_IN_MESSAGE 52 | - SEC_KEYWORDS_IN_LINKED_GH 53 | - SEC_KEYWORDS_IN_LINKED_BUG 54 | - GITHUB_ISSUE_IN_MESSAGE 55 | - BUG_IN_MESSAGE 56 | - COMMIT_HAS_TWINS 57 | # Phase 2 Rules (llm_service required!): 58 | # - COMMIT_IS_SECURITY_RELEVANT 59 | 60 | # Report file format: "html", "json", "console" or "all" 61 | # and the file name 62 | report: 63 | format: html 64 | name: prospector-report 65 | no_diff: False 66 | 67 | 68 | # Log level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" 69 | log_level: INFO 70 | 71 | # The directory used to cache the cloned repositories 72 | git_cache: /tmp/gitcache 73 | 74 | # The GitHub API token 75 | # github_token: 76 | -------------------------------------------------------------------------------- /prospector/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/core/__init__.py -------------------------------------------------------------------------------- /prospector/core/prospector_test.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import pytest 4 | 5 | from llm.llm_service import LLMService 6 | 7 | from .prospector import prospector 8 | 9 | OPENCAST_CVE = "CVE-2021-21318" 10 | OPENCAST_REPO = "https://github.com/opencast/opencast" 11 | 12 | 13 | # Mock the llm_service configuration object 14 | class Config: 15 | type: str = None 16 | model_name: str = None 17 | temperature: str = None 18 | ai_core_sk: str = None 19 | 20 | def __init__(self, type, model_name, temperature, ai_core_sk): 21 | self.type = type 22 | self.model_name = model_name 23 | self.temperature = temperature 24 | self.ai_core_sk = ai_core_sk 25 | 26 | 27 | config = Config("sap", "gpt-4", 0.0, "sk.json") 28 | 29 | 30 | def test_prospector_client(): 31 | results, _ = prospector( 32 | vulnerability_id=OPENCAST_CVE, 33 | repository_url=OPENCAST_REPO, 34 | version_interval="9.1:9.2", 35 | git_cache="/tmp/gitcache", 36 | limit_candidates=5000, 37 | ) 38 | assert results[0].commit_id == "b18c6a7f81f08ed14884592a6c14c9ab611ad450" 39 | 40 | 41 | def test_prospector_llm_repo_url(): 42 | LLMService(config) 43 | 44 | results, _ = prospector( 45 | vulnerability_id=OPENCAST_CVE, 46 | version_interval="9.1:9.2", 47 | git_cache="/tmp/gitcache", 48 | limit_candidates=5000, 49 | use_llm_repository_url=True, 50 | ) 51 | assert results[0].commit_id == "b18c6a7f81f08ed14884592a6c14c9ab611ad450" 52 | -------------------------------------------------------------------------------- /prospector/core/report_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | from random import randint 4 | 5 | import core.report as report 6 | from datamodel.advisory import build_advisory_record 7 | from datamodel.commit import Commit 8 | from util.sample_data_generation import ( # random_list_of_url, 9 | random_commit_hash, 10 | random_dict_of_github_issue_ids, 11 | random_dict_of_jira_refs, 12 | random_dict_of_strs, 13 | random_list_of_cve, 14 | random_list_of_path, 15 | random_list_of_strs, 16 | random_url, 17 | sample_statistics, 18 | ) 19 | 20 | 21 | def test_report_generation(): 22 | candidates = [] 23 | for _ in range(100): 24 | annotated_candidates = Commit( 25 | commit_id=random_commit_hash(), 26 | repository=random_url(4), 27 | message=" ".join(random_list_of_strs(100)), 28 | timestamp=randint(0, 100000), 29 | hunks=randint(1, 50), 30 | diff=random_list_of_strs(200), 31 | changed_files=random_list_of_path(4, 42), 32 | message_reference_content=random_list_of_strs(42), 33 | jira_refs=random_dict_of_jira_refs(42), 34 | ghissue_refs=random_dict_of_github_issue_ids(100000, 42), 35 | cve_refs=random_list_of_cve(42), 36 | tags=random_list_of_strs(42), 37 | annotations=random_dict_of_strs(16, 10), 38 | ) 39 | 40 | candidates.append(annotated_candidates) 41 | 42 | advisory = build_advisory_record("CVE-2014-0050") 43 | 44 | if os.path.isfile("test_report.html"): 45 | os.remove("test_report.html") 46 | if os.path.isfile("test_report.json"): 47 | os.remove("test_report.json") 48 | html = report.html_( 49 | candidates, advisory, "test_report.html", statistics=sample_statistics() 50 | ) 51 | json = report.json_(candidates, advisory, "test_report.json") 52 | 53 | assert os.path.isfile(html) 54 | assert os.path.isfile(json) 55 | -------------------------------------------------------------------------------- /prospector/core/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | 44 | 45 | {% block title %}Prospector Report{% endblock %} 46 | 47 | 48 | 49 | {% block content %} 50 | {% endblock %} 51 | 52 | 53 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /prospector/core/templates/card/changed_paths_block.html: -------------------------------------------------------------------------------- 1 | {% extends "titled_block.html" %} 2 | {% set title = "Changed files in commit" %} 3 | {% set icon = "fas fa-file-signature" %} 4 | {% block body %} 5 |
    6 |
  • 7 | {% for path in annotated_commit.changed_files %} 8 | {{ path }} 9 | {% endfor %} 10 |
  • 11 |
12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /prospector/core/templates/card/commit_header.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
5 | {{ annotated_commit.message | truncate(72) }} 6 | {% if not annotated_commit.message %} 7 | (no commit message found) 8 | {% endif %} 9 |
10 |
11 |
12 | 13 |
14 |
15 | 19 |
20 |
21 |
22 |
23 | {% if annotated_commit.relevance > 0 %} 24 |
25 | 26 | Relevance: {{ annotated_commit.relevance }} 27 |
28 | {% endif %} 29 |
30 |
31 |
32 | 33 | Tag: {{ annotated_commit.tags[0] }} 34 |
35 |
36 |
37 |
38 |
39 | {% if annotated_commit.matched_rules|length > 0 %} 40 | 41 | {% for rule in annotated_commit.matched_rules %} 42 | {{ rule.id }} 43 | {% endfor %} 44 | {% endif %} 45 |
46 |
47 |
48 | -------------------------------------------------------------------------------- /prospector/core/templates/card/commit_title_block.html: -------------------------------------------------------------------------------- 1 |
2 | {{ annotated_commit.commit_id }} 3 |
4 |
5 | 6 | {% if 'github' in annotated_commit.repository %} 7 | 9 | Open commit 10 | {% else %} 11 | Open Repository (unknown 12 | API) 13 | {% endif %} 14 |
15 | -------------------------------------------------------------------------------- /prospector/core/templates/card/matched_rules_block.html: -------------------------------------------------------------------------------- 1 | {% extends "titled_block.html" %} 2 | {% if annotated_commit.matched_rules|length > 0 %} 3 | {% set title = "Matched rules" %} 4 | {% set icon = "fas fa-bullhorn" %} 5 | {% block body %} 6 |
    7 | {% for rule in annotated_commit.matched_rules %} 8 |
  • 9 |
    {{ rule.message }}
    10 |
  • 11 | {% endfor %} 12 |
13 | {% endblock %} 14 | {% endif %} 15 | -------------------------------------------------------------------------------- /prospector/core/templates/card/message_block.html: -------------------------------------------------------------------------------- 1 | {% extends "titled_block.html" %} 2 | {% set title = "Commit message" %} 3 | {% set icon = "fas fa-quote-left" %} 4 | {% block body %} 5 |
    6 |
  • 7 |
    {{ annotated_commit.message }}
    8 |
  • 9 |
10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /prospector/core/templates/card/pages_linked_from_advisories_block.html: -------------------------------------------------------------------------------- 1 | {% extends "titled_block.html" %} 2 | {% set title = "Referred by pages linked from advisories" %} 3 | {% set icon = "fas fa-link" %} 4 | {% block body %} 5 |

6 | {% for page in annotated_commit.referred_to_by_pages_linked_from_advisories %} 7 | {{ page }} 8 | {% endfor %} 9 |

10 | {% endblock %} -------------------------------------------------------------------------------- /prospector/core/templates/card/twin_list_block.html: -------------------------------------------------------------------------------- 1 | {% if annotated_commit.twins %} 2 | {% extends "titled_block.html" %} 3 | {% set title = "Commit twins" %} 4 | {% set icon = "fas fa-shield-alt" %} 5 | {% block body %} 6 |
    7 | {% for tag, id in annotated_commit.twins %} 8 |
  • 9 | {{tag}}: 10 | 11 | {{ id }} 12 | 13 |
  • 14 | {% endfor %} 15 |
16 | {% endblock %} 17 | {% endif %} 18 | -------------------------------------------------------------------------------- /prospector/core/templates/collapse_all_scripts.html: -------------------------------------------------------------------------------- 1 | 42 | -------------------------------------------------------------------------------- /prospector/core/templates/report_header.html: -------------------------------------------------------------------------------- 1 |
2 | {% if candidates|length > 0 %} 3 | {% if candidates[0].relevance != 0 %} 4 |

Filters

5 |

6 | Use the slider to filter out lower relevance scores and the button to collapse or expand all the commits. 7 |

8 | 11 |
12 |
13 | 14 |
15 |
16 | 18 |
19 |
20 | {% endif %} 21 | {% endif %} 22 | 23 | 24 |
25 |

Advisory Record

26 | {{ 27 | advisory_record.cve_id }}
28 |

{{ advisory_record.description }}

29 | 30 | {% if advisory_record.files|length > 0 %} 31 |
Possible relevant files/methods
32 |

33 |

    34 | {% for file in advisory_record.files | sort %} 35 |
  • {{file}}
  • 36 | {% endfor %} 37 |
38 |

39 | {% endif %} 40 | {% if advisory_record.keywords|length > 0 %} 41 |
Other relevant keywords
42 |

43 | {% for token in advisory_record.keywords | sort %} 44 |

  • {{token}}
  • 45 | 46 | {% endfor %} 47 |

    48 | {% endif %} 49 |
    50 | 51 | 55 | 56 |
    57 |
    58 |
    Execution Statistics
    59 | 60 |
    61 |
    62 | {{ execution_statistics | safe }} 63 |
    64 |
    65 | 66 |
    67 | -------------------------------------------------------------------------------- /prospector/core/templates/results.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 |
    5 |
    6 | {% include "report_header.html" %} 7 | 8 |
    9 |
    10 |
    11 |
    12 |

    Prospector Report

    13 |
    14 | 21 | 22 |
    23 |
    24 | {% for annotated_commit in candidates %} 25 |
    28 | 29 |
    30 | {% include "card/commit_header.html" %} 31 |
    32 | 33 | 34 | 35 | 36 |
    38 |
    39 | {% include "card/commit_title_block.html" %} 40 | {% include "card/matched_rules_block.html" %} 41 | {% include "card/message_block.html" %} 42 | {% include "card/changed_paths_block.html" %} 43 | {% include "card/twin_list_block.html" %} 44 | 45 |
    46 |
    47 |
    48 | {% endfor %} 49 |
    50 |
    51 |
    52 |
    53 |
    54 | 55 | {% include "filtering_scripts.html" %} 56 | {% include "collapse_all_scripts.html" %} 57 | {% endblock %} 58 | -------------------------------------------------------------------------------- /prospector/core/templates/titled_block.html: -------------------------------------------------------------------------------- 1 |
    {{ title }}
    2 | {% block body %}empty block{% endblock %} -------------------------------------------------------------------------------- /prospector/datamodel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/datamodel/__init__.py -------------------------------------------------------------------------------- /prospector/datamodel/commit_test.py: -------------------------------------------------------------------------------- 1 | # from dataclasses import asdict 2 | import pytest 3 | 4 | from git.git import Git 5 | 6 | from .commit import make_from_raw_commit 7 | 8 | SHENYU = "https://github.com/apache/shenyu" 9 | COMMIT = "0e826ceae97a1258cb15c73a3072118c920e8654" 10 | COMMIT_2 = "530bff5a0618062d3f253dab959785ce728d1f3c" 11 | 12 | 13 | @pytest.fixture 14 | def repository(): 15 | repo = Git(SHENYU) # Git("https://github.com/slackhq/nebula") 16 | repo.clone() 17 | return repo 18 | 19 | 20 | def test_preprocess_commit(repository: Git): 21 | 22 | repo = repository 23 | raw_commit = repo.get_commit( 24 | COMMIT_2 25 | ) # repo.get_commit("e434ba6523c4d6d22625755f9890039728e6676a") 26 | 27 | make_from_raw_commit(raw_commit) 28 | 29 | 30 | def test_preprocess_commit_set(repository: Git): 31 | 32 | repo = repository 33 | commit_set = repo.create_commits(since="1615441712", until="1617441712") 34 | preprocessed_commits = [] 35 | 36 | for commit_id in commit_set: 37 | raw_commit = repo.get_commit(commit_id) 38 | preprocessed_commits.append(make_from_raw_commit(raw_commit)) 39 | 40 | assert len(preprocessed_commits) == len(commit_set) 41 | 42 | 43 | def test_commit_ordering(repository: Git): 44 | assert True 45 | 46 | 47 | def test_find_twin(repository: Git): 48 | assert True 49 | -------------------------------------------------------------------------------- /prospector/datamodel/constants.py: -------------------------------------------------------------------------------- 1 | REL_EXT_SMALL = ["java", "c", "cpp", "py", "js", "go", "php", "h"] 2 | 3 | RELEVANT_EXTENSIONS = [ 4 | "java", 5 | "c", 6 | "cpp", 7 | "h", 8 | "py", 9 | "js", 10 | "xml", 11 | "go", 12 | "rb", 13 | "php", 14 | "sh", 15 | "scale", 16 | "lua", 17 | "m", 18 | "pl", 19 | "ts", 20 | "swift", 21 | "sql", 22 | "groovy", 23 | "erl", 24 | "swf", 25 | "vue", 26 | "bat", 27 | "s", 28 | "ejs", 29 | "yaml", 30 | "yml", 31 | "jar", 32 | "jsp", 33 | ] 34 | -------------------------------------------------------------------------------- /prospector/datamodel/user.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains code related to users 3 | """ 4 | from dataclasses import dataclass 5 | 6 | from . import BaseModel 7 | 8 | 9 | @dataclass 10 | class User(BaseModel): 11 | """ 12 | This represents a user of the REST API 13 | """ 14 | 15 | user_id: str 16 | firstname: str 17 | lastname: str = "" 18 | hashed_password: str = "" 19 | -------------------------------------------------------------------------------- /prospector/ddl/10_commit.sql: -------------------------------------------------------------------------------- 1 | -- public.commits definition 2 | 3 | -- Drop table 4 | 5 | DROP TABLE IF EXISTS public.commits; 6 | CREATE TABLE public.commits ( 7 | commit_id varchar(40) NOT NULL, 8 | repository varchar NOT NULL, 9 | timestamp int, 10 | -- preprocessed data 11 | hunks int, 12 | message varchar NULL, 13 | diff varchar[] NULL, 14 | changed_files varchar[] NULL, 15 | message_reference_content varchar[] NULL, 16 | jira_refs jsonb NULL, 17 | ghissue_refs jsonb NULL, 18 | cve_refs varchar[] NULL, 19 | tags varchar[] NULL, 20 | minhash varchar NULL, 21 | CONSTRAINT commits_pkey PRIMARY KEY (commit_id, repository) 22 | ); 23 | CREATE INDEX IF NOT EXISTS commit_index ON public.commits USING btree (commit_id); 24 | CREATE UNIQUE INDEX IF NOT EXISTS commit_repository_index ON public.commits USING btree (commit_id, repository); 25 | CREATE INDEX IF NOT EXISTS repository_index ON public.commits USING btree (repository); 26 | -------------------------------------------------------------------------------- /prospector/ddl/20_users.sql: -------------------------------------------------------------------------------- 1 | -- public.users definition 2 | 3 | -- Drop table 4 | 5 | DROP TABLE IF EXISTS public.users; 6 | 7 | CREATE TABLE public.users ( 8 | id varchar(40) NOT NULL PRIMARY KEY, 9 | hashed_password varchar(40) NOT NULL, 10 | firstname varchar NOT NULL, 11 | lastname varchar NULL, 12 | photo varchar NULL, 13 | account_created varchar NULL, 14 | last_access varchar NULL 15 | ); 16 | -------------------------------------------------------------------------------- /prospector/ddl/30_vulnerability.sql: -------------------------------------------------------------------------------- 1 | -- public.vulnerability definition 2 | 3 | -- Drop table 4 | 5 | DROP TABLE IF EXISTS public.vulnerability; 6 | 7 | CREATE TABLE public.vulnerability ( 8 | _id SERIAL PRIMARY KEY, 9 | vuln_id varchar NOT NULL, 10 | published_date timestamp, 11 | last_modified_date timestamp, 12 | raw_record JSON, 13 | source varchar, 14 | url varchar, 15 | alias varchar[], 16 | UNIQUE (vuln_id,last_modified_date,source) 17 | ); 18 | -------------------------------------------------------------------------------- /prospector/ddl/40_processed_vuln.sql: -------------------------------------------------------------------------------- 1 | -- public.processed_vuln definition 2 | 3 | -- Drop table 4 | 5 | DROP TABLE IF EXISTS public.processed_vuln; 6 | 7 | CREATE TABLE public.processed_vuln ( 8 | _id SERIAL PRIMARY KEY, 9 | fk_vulnerability INT NOT NULL UNIQUE, 10 | repository varchar NOT NULL, 11 | versions varchar, 12 | FOREIGN KEY (fk_vulnerability) REFERENCES public.vulnerability (_id) 13 | ); 14 | -------------------------------------------------------------------------------- /prospector/ddl/50_job.sql: -------------------------------------------------------------------------------- 1 | -- public.job definition 2 | 3 | -- Drop table 4 | 5 | DROP TABLE IF EXISTS public.job; 6 | 7 | CREATE TABLE public.job ( 8 | _id varchar NOT null PRIMARY KEY, 9 | pv_id INT, 10 | params varchar NOT NULL, 11 | enqueued_at timestamp, 12 | started_at timestamp, 13 | finished_at timestamp, 14 | results varchar, 15 | created_by varchar, 16 | created_from varchar, 17 | status varchar, 18 | FOREIGN KEY (pv_id) REFERENCES public.processed_vuln (_id), 19 | FOREIGN KEY (created_from) REFERENCES public.job (_id) 20 | ); 21 | -------------------------------------------------------------------------------- /prospector/ddl/60_alter_commit.sql: -------------------------------------------------------------------------------- 1 | -- public.commit alteration 2 | 3 | -- add column to save commit's relevance 4 | 5 | ALTER TABLE public.commits 6 | ADD security_relevant boolean NULL; -------------------------------------------------------------------------------- /prospector/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.1" 2 | 3 | services: 4 | backend: 5 | build: 6 | context: . 7 | dockerfile: docker/service/Dockerfile 8 | ports: 9 | - "8000:8000" 10 | volumes: 11 | - .:/app 12 | depends_on: 13 | - redis 14 | - db 15 | environment: 16 | GIT_CACHE: /tmp/gitcache 17 | CVE_DATA_PATH: /app/cve_data 18 | REDIS_URL: redis://redis:6379/0 19 | 20 | worker: 21 | build: 22 | context: . 23 | dockerfile: docker/worker/Dockerfile 24 | volumes: 25 | - ./:/app 26 | - ./data_sources/reports:/app/data_sources/reports 27 | - ./evaluation/data/reports/:/app/evaluation/data/reports 28 | - ./../../../data/gitcache:/tmp/gitcache 29 | depends_on: 30 | - redis 31 | environment: 32 | LOG_LEVEL: debug 33 | PIP_REQUIREMENTS: requirements.txt 34 | GIT_CACHE: /tmp/gitcache 35 | 36 | db: 37 | image: postgres 38 | container_name: db 39 | restart: always 40 | ports: 41 | - "5432:5432" 42 | environment: 43 | POSTGRES_PASSWORD: example #${POSTGRES_PASSWORD} 44 | volumes: 45 | - ./ddl:/docker-entrypoint-initdb.d 46 | - ${POSTGRES_DATA}:/var/lib/postgresql/data 47 | 48 | redis: 49 | image: "redis:alpine" 50 | ports: 51 | - "6379:6379" 52 | 53 | adminer: 54 | image: adminer 55 | restart: always 56 | ports: 57 | - 8080:8080 58 | -------------------------------------------------------------------------------- /prospector/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | RUN mkdir -p /app 4 | COPY ./requirements.txt /app/ 5 | WORKDIR /app 6 | # Create log files with permissions for host user 7 | RUN touch evaluation.log 8 | RUN touch prospector.log 9 | RUN chown ${UID}:${GID} evaluation.log 10 | RUN chown ${UID}:${GID} prospector.log 11 | 12 | # Install dependencies with pip 13 | RUN pip install --upgrade pip 14 | RUN apt update && apt install -y --no-install-recommends gcc g++ libffi-dev python3-dev libpq-dev git curl 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | RUN python -m spacy download en_core_web_sm 17 | RUN apt autoremove -y gcc g++ libffi-dev python3-dev && apt clean && rm -rf /var/lib/apt/lists/* 18 | 19 | ENV PYTHONPATH "${PYTHONPATH}:/app" 20 | -------------------------------------------------------------------------------- /prospector/docker/cli/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | #FROM python:3.10-slim 3 | # 4 | #WORKDIR /clirun 5 | #VOLUME ["/clirun"] 6 | #ENV PYTHONPATH "${PYTHONPATH}:/clirun" 7 | # 8 | #RUN pip install --upgrade pip 9 | #RUN apt update && apt install -y --no-install-recommends gcc g++ libffi-dev python3-dev libpq-dev git curl 10 | #COPY requirements.txt . 11 | #RUN pip install --no-cache-dir -r requirements.txt 12 | #RUN python -m spacy download en_core_web_sm 13 | #RUN apt autoremove -y gcc g++ libffi-dev python3-dev && apt clean && rm -rf /var/lib/apt/lists/* 14 | # 15 | #ENTRYPOINT [ "python","cli/main.py" ] 16 | 17 | FROM prospector-base:1.0 18 | 19 | #WORKDIR /clirun 20 | # 21 | #VOLUME ["/clirun"] 22 | #ENV PYTHONPATH "${PYTHONPATH}:/clirun" 23 | #WORKDIR /app 24 | #ENV PYTHONPATH "${PYTHONPATH}:/app" 25 | 26 | #VOLUME [ "/results" ] 27 | ENTRYPOINT [ "python","cli/main.py" ] 28 | -------------------------------------------------------------------------------- /prospector/docker/service/Dockerfile: -------------------------------------------------------------------------------- 1 | #FROM python:3.10-slim 2 | #VOLUME ["/app"] 3 | ##COPY docker/service/start.sh /app/start.sh 4 | ##COPY ./config-sample.yaml /app/config.yaml 5 | ##RUN chmod +x /app/start.sh 6 | ##COPY ./service/ /app 7 | #RUN pip install --upgrade pip 8 | #RUN apt update && apt install -y --no-install-recommends gcc g++ libffi-dev python3-dev libpq-dev 9 | #COPY requirements.txt /requirements.txt 10 | #RUN pip install --no-cache-dir -r requirements.txt 11 | #RUN python -m spacy download en_core_web_sm 12 | #RUN apt autoremove -y gcc g++ libffi-dev python3-dev && apt clean && rm -rf /var/lib/apt/lists/* 13 | #ENV PYTHONPATH "${PYTHONPATH}:/app" 14 | ##RUN rm -rf /app/rules 15 | ##RUN mkdir /app/cve_data 16 | #WORKDIR /app 17 | ##CMD tail -f /dev/null 18 | #CMD ["python","./service/main.py"] 19 | 20 | FROM prospector-base:1.0 21 | 22 | #VOLUME ["/app"] 23 | #ENV PYTHONPATH "${PYTHONPATH}:/app" 24 | #WORKDIR /app 25 | 26 | CMD ["python","./service/main.py"] 27 | -------------------------------------------------------------------------------- /prospector/docker/service/start.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | 3 | # python api/routers/nvd_feed_update.py 4 | # echo "NVD feed download complete" 5 | 6 | python main.py 7 | -------------------------------------------------------------------------------- /prospector/docker/worker/Dockerfile: -------------------------------------------------------------------------------- 1 | #FROM python:3.10-slim 2 | # 3 | #LABEL Author="Antonino Sabetta " 4 | # 5 | ## NOTICE 6 | ## 7 | ## This dockerfile is based on https://github.com/geekinutah/docker-python-rq-worker 8 | ## by Mike Wilson 9 | ## 10 | ## It was modified to work with alpine instead of ubuntu; a few of the 11 | ## original dependencies were dropped too to make the image even slimmer 12 | # 13 | #ENV TERM=xterm-256color 14 | #ENV REDIS_HOST=redis 15 | #ENV REDIS_PORT=6379 16 | #ENV REDIS_DB=0 17 | #ENV RQ_QUEUE=default 18 | #ENV LOG_LEVEL=DEBUG 19 | #ENV PIP_PACKAGES=none 20 | #ENV PIP_REQUIREMENTS=none 21 | #ENV LC_ALL=C.UTF-8 22 | #ENV LANG=C.UTF-8 23 | # 24 | #RUN pip install --upgrade pip 25 | #RUN apt update && apt install -y --no-install-recommends gcc g++ libffi-dev python3-dev libpq-dev git supervisor curl 26 | # 27 | #COPY requirements.txt /requirements.txt 28 | #RUN pip install --no-cache-dir -r requirements.txt 29 | #RUN python -m spacy download en_core_web_sm 30 | #COPY docker/worker/start_rq_worker.sh /usr/local/bin/start_rq_worker.sh 31 | ## COPY etc_supervisor_confd_rqworker.conf.j2 /etc/supervisor/conf.d/rqworker.conf.j2 32 | #COPY docker/worker/etc_supervisor_confd_rqworker.conf.j2 /etc/supervisor.d/rqworker.ini.j2 33 | #VOLUME ["/pythonimports"] 34 | #ENV PYTHONPATH "${PYTHONPATH}:/pythonimports" 35 | # 36 | ##CMD tail -f /dev/null 37 | #RUN chmod +x /usr/local/bin/start_rq_worker.sh 38 | #ENTRYPOINT ["/usr/local/bin/start_rq_worker.sh"] 39 | # 40 | 41 | FROM prospector-base:1.0 42 | 43 | #LABEL Author="Antonino Sabetta " 44 | # 45 | ## NOTICE 46 | ## 47 | ## This dockerfile is based on https://github.com/geekinutah/docker-python-rq-worker 48 | ## by Mike Wilson 49 | ## 50 | ## It was modified to work with alpine instead of ubuntu; a few of the 51 | ## original dependencies were dropped too to make the image even slimmer 52 | 53 | ENV TERM=xterm-256color 54 | ENV REDIS_HOST=redis 55 | ENV REDIS_PORT=6379 56 | ENV REDIS_DB=0 57 | ENV RQ_QUEUE=default 58 | ENV LOG_LEVEL=DEBUG 59 | ENV PIP_PACKAGES=none 60 | ENV PIP_REQUIREMENTS=none 61 | ENV LC_ALL=C.UTF-8 62 | ENV LANG=C.UTF-8 63 | 64 | RUN apt update && apt install -y --no-install-recommends supervisor 65 | 66 | COPY docker/worker/start_rq_worker.sh /usr/local/bin/start_rq_worker.sh 67 | COPY docker/worker/etc_supervisor_confd_rqworker.conf.j2 /etc/supervisor.d/rqworker.ini.j2 68 | 69 | #VOLUME ["/pythonimports"] 70 | #ENV PYTHONPATH "${PYTHONPATH}:/pythonimports" 71 | 72 | VOLUME [ "/pipeline/reports" ] 73 | 74 | RUN chmod +x /usr/local/bin/start_rq_worker.sh 75 | #CMD tail -f /dev/null 76 | 77 | # Create directory for gitcache and run git config command to avoid 'dubious ownership' error 78 | RUN mkdir -p /tmp/gitcache && \ 79 | cd /tmp/gitcache && \ 80 | git config --global --add safe.directory '*' 81 | 82 | 83 | ENTRYPOINT ["/usr/local/bin/start_rq_worker.sh"] 84 | -------------------------------------------------------------------------------- /prospector/docker/worker/etc_supervisor_confd_rqworker.conf.j2: -------------------------------------------------------------------------------- 1 | ; COPYRIGHT NOTICE 2 | ; 3 | ; This file comes from https://github.com/geekinutah/docker-python-rq-worker 4 | ; (possibly with minor modifications) 5 | 6 | [program:rqworker] 7 | command=/usr/local/bin/python3 /usr/local/bin/rq worker {{env['RQ_QUEUE']}} -u redis://{{env['REDIS_HOST']}}:{{env['REDIS_PORT']}}/{{env['REDIS_DB']}} --logging_level {{env['LOG_LEVEL']}} --path /app/pipeline/nvd --path /app/service/api/routers 8 | process_name=%(program_name)s%(process_num)01d 9 | 10 | ; If you want to run more than one worker instance, increase this 11 | numprocs=10 12 | redirect_stderr=true 13 | 14 | ; This is the directory from which RQ is ran. Be sure to point this to the 15 | ; directory where your source code is importable from. rq-scheduler depends 16 | ; on this directory to correctly import functions. 17 | directory=/app 18 | 19 | ; RQ requires the TERM signal to perform a warm shutdown. If RQ does not die 20 | ; within 10 seconds, supervisor will forcefully kill it 21 | stopsignal=TERM 22 | stdout_logfile=/dev/stdout 23 | stdout_logfile_maxbytes=0 24 | 25 | autostart=true 26 | autorestart=true 27 | -------------------------------------------------------------------------------- /prospector/docker/worker/start_rq_worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # COPYRIGHT NOTICE 4 | # 5 | # This code comes from https://github.com/geekinutah/docker-python-rq-worker 6 | # (with minor modifications to make it work with alpine instead of ubuntu) 7 | 8 | if [ "${PIP_PACKAGES}" != 'none' ]; then 9 | for i in $(echo "${PIP_PACKAGES}" | sed 's/,/ /g'); do 10 | pip3 install $i 11 | done 12 | fi 13 | 14 | # If there is a requirements file, install that 15 | 16 | if [ "${PIP_REQUIREMENTS}" != 'none' ]; then 17 | pip3 install -r "${PIP_REQUIREMENTS}" 18 | fi 19 | 20 | cat /etc/supervisor.d/rqworker.ini.j2 | python3 -c 'import os;import sys; import jinja2; sys.stdout.write(jinja2.Template(sys.stdin.read()).render(env=os.environ))' > /etc/supervisor.d/rqworker.ini 21 | echo "files = /etc/supervisor.d/*.ini" >> /etc/supervisor/supervisord.conf 22 | supervisord -n 23 | -------------------------------------------------------------------------------- /prospector/docs/img/prospector-assuremoss-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/docs/img/prospector-assuremoss-arch.png -------------------------------------------------------------------------------- /prospector/docs/img/prospector-assuremoss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/docs/img/prospector-assuremoss.png -------------------------------------------------------------------------------- /prospector/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/evaluation/__init__.py -------------------------------------------------------------------------------- /prospector/evaluation/config-sample.yaml: -------------------------------------------------------------------------------- 1 | # Debug Level 2 | debug_level: INFO 3 | 4 | # Input Data 5 | input_data_path: evaluation/data/input/ # datasets to run prospector on (CSV: ID;URL;VERSIONS;FLAG;COMMITS;COMMENTS) 6 | 7 | # directory to where the batches of reports are to be found 8 | reports_directory: ../../../data/prospector_reports/ 9 | # filename of the ground truth dataset file 10 | input: tracer 11 | 12 | # Prospector Batch Selection 13 | version_interval: true 14 | llm_support: true 15 | batch: regular # regular, old_code, old_reports 16 | cves: all # optionally select a subset of CVEs, eg. CVE-2020-1925 to only execute on this CVE 17 | 18 | # compare reports selected above to 19 | compare_directory: ../../../data/prospector_reports/nvi_with_llm/ 20 | 21 | # Prospector settings (from the Prospector config.yaml) 22 | prospector_settings: 23 | # Maximum number of commits to process 24 | max_candidates: 2000 25 | 26 | # Whether to use a backend or not: "always", "never", "optional" 27 | use_backend: never 28 | 29 | # backend: http://backend:8000 30 | backend: http://localhost:8000 31 | 32 | database: 33 | user: postgres 34 | password: example 35 | host: db # Database address; when in containerised version, use 'db', otherwise 'localhost' 36 | port: 5432 37 | dbname: postgres 38 | 39 | redis_url: redis://localhost:6379/0 40 | 41 | # LLM Usage (check README for help) 42 | llm_service: 43 | type: sap # sap or third_party 44 | model_name: gpt-4 # gpt-4 or mistral-large-latest 45 | temperature: 0.0 # optional, default is 0.0 46 | ai_core_sk: sk.json 47 | 48 | # LLM support options (check README for help) 49 | use_llm_repository_url: True # True in order to instantiate the Singleton, but the URL will be passed in the evaluation 50 | 51 | enabled_rules: 52 | # Phase 1 Rules 53 | - VULN_ID_IN_MESSAGE 54 | - XREF_BUG 55 | - XREF_GH 56 | - COMMIT_IN_REFERENCE 57 | - VULN_ID_IN_LINKED_ISSUE 58 | - CHANGES_RELEVANT_FILES 59 | - CHANGES_RELEVANT_CODE 60 | - RELEVANT_WORDS_IN_MESSAGE 61 | - ADV_KEYWORDS_IN_FILES 62 | - ADV_KEYWORDS_IN_MSG 63 | - SEC_KEYWORDS_IN_MESSAGE 64 | - SEC_KEYWORDS_IN_LINKED_GH 65 | - SEC_KEYWORDS_IN_LINKED_BUG 66 | - GITHUB_ISSUE_IN_MESSAGE 67 | - BUG_IN_MESSAGE 68 | - COMMIT_HAS_TWINS 69 | # Phase 2 Rules (llm_service required!): 70 | - COMMIT_IS_SECURITY_RELEVANT 71 | 72 | git_cache: /tmp/gitcache # When running Prospector containerised -------------------------------------------------------------------------------- /prospector/evaluation/data/results/summary_execution_mvi_table.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | % \tiny 4 | \begin{tabular}{| l c c c c |} 5 | \rowcolor{gray!50} \textbf{Result} & \textbf{Without LLM} & \textbf{\%} & \textbf{With LLM} & \textbf{\%} \\ \hline 6 | High confidence & 468 & 35.86 & 548 & 41.99 \\ 7 | \rowcolor{gray!20} \begin{tabular}{l} 8 | \quad Commit in reference* \\ 9 | \end{tabular} & 413 & 88.25 & 413 & 75.36 \\ 10 | \rowcolor{gray!20} \begin{tabular}{l} 11 | \quad CVE ID in message* \\ 12 | \end{tabular} & 69 & 14.74 & 69 & 12.59 \\ 13 | \rowcolor{gray!20} \begin{tabular}{l} 14 | \quad CVE ID in Issue* \\ 15 | \end{tabular} & 12 & 2.56 & 12 & 2.19 \\ 16 | \rowcolor{gray!20} \begin{tabular}{l} 17 | \quad Cross Reference* \\ 18 | \end{tabular} & 135 & 28.85 & 135 & 24.64 \\ 19 | \rowcolor{gray!20} \begin{tabular}{l} 20 | \quad Commit is Security Relevant* \\ 21 | \end{tabular} & 0 & 0.0 & 488 & 37.39 \\ 22 | Medium confidence & 93 & 7.13 & 18 & 1.38 \\ 23 | Low confidence & 11 & 0.84 & 6 & 0.46 \\ 24 | Not found (rank $> 10$) & 11 & 0.84 & 11 & 0.84 \\ 25 | Not reported & 205 & 15.71 & 44 & 3.37 \\ 26 | False Positive & 517 & 39.62 & 678 & 51.95 \\ 27 | Aborted (due to exceeding candidate limit) & 14 & 1.07 & 14 & 1.07 \\ 28 | \textbf{Total} & \textbf{1305} & & \textbf{1305} & \\ \hline 29 | \end{tabular} 30 | \caption{Prospector Evaluation Results (* percentage of high confidence category)} 31 | \label{tab:tracer_dataset_results_mvi} 32 | \end{table} -------------------------------------------------------------------------------- /prospector/evaluation/data/results/summary_execution_nvi_table.tex: -------------------------------------------------------------------------------- 1 | \begin{table} 2 | \centering 3 | % \tiny 4 | \begin{tabular}{| l c c c c|} 5 | \rowcolor{gray!50} \textbf{Result} & \textbf{Without LLM} & \textbf{\%} & \textbf{With LLM} & \textbf{\%} \\ \hline 6 | High confidence & 899 & 69.58 & 977 & 75.62 \\ 7 | \rowcolor{gray!20} \begin{tabular}{l} 8 | \quad Commit in reference* \\ 9 | \end{tabular} & 835 & 92.88 & 835 & 85.47 \\ 10 | \rowcolor{gray!20} \begin{tabular}{l} 11 | \quad CVE ID in message* \\ 12 | \end{tabular} & 122 & 13.57 & 123 & 12.59 \\ 13 | \rowcolor{gray!20} \begin{tabular}{l} 14 | \quad CVE ID in Issue* \\ 15 | \end{tabular} & 32 & 3.56 & 32 & 3.28 \\ 16 | \rowcolor{gray!20} \begin{tabular}{l} 17 | \quad Cross Reference* \\ 18 | \end{tabular} & 322 & 35.82 & 319 & 32.65 \\ 19 | \rowcolor{gray!20} \begin{tabular}{l} 20 | \quad Commit is Security Relevant* \\ 21 | \end{tabular} & 0 & 0.0 & 857 & 66.33 \\ 22 | Medium confidence & 82 & 6.35 & 10 & 0.77 \\ 23 | Low confidence & 14 & 1.08 & 8 & 0.62 \\ 24 | Not found (rank $> 10$) & 31 & 2.4 & 30 & 2.32 \\ 25 | Not reported & 243 & 18.81 & 85 & 6.58 \\ 26 | False Positive & 23 & 1.78 & 182 & 14.09 \\ 27 | Aborted (candidate and time limit exceeded) & 27 & 2.09 & 27 & 2.09 \\ 28 | \textbf{Total} & \textbf{1292} & & \textbf{1292} & \\ \hline 29 | \end{tabular} 30 | \caption{Evaluation Results without version interval (* percentage of high confidence category)} 31 | \label{tab:tracer_dataset_results_nvi} 32 | \end{table} -------------------------------------------------------------------------------- /prospector/filtering/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/filtering/__init__.py -------------------------------------------------------------------------------- /prospector/git/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/git/__init__.py -------------------------------------------------------------------------------- /prospector/git/exec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from functools import lru_cache 4 | from typing import List, Optional 5 | 6 | from log.logger import logger 7 | 8 | 9 | class Exec: 10 | def __init__(self, workdir=None, encoding="latin-1", timeout=None): 11 | self.encoding = encoding 12 | self.timeout = timeout 13 | self.set_dir(workdir) 14 | 15 | def set_dir(self, path): 16 | if os.path.isabs(path): 17 | self._workdir = path 18 | else: 19 | raise ValueError(f"Path must be absolute for Exec to work: {path}") 20 | 21 | def run(self, cmd: str, silent=False, cache: bool = False): 22 | if cache: 23 | return self.run_cached(cmd, silent) 24 | 25 | return self.run_uncached(cmd, silent) 26 | 27 | # TODO lru_cache only works for one python process. 28 | # If you are running multiple subprocesses, 29 | # or running the same script over and over, lru_cache will not work. 30 | @lru_cache(maxsize=10000) 31 | def run_cached(self, cmd, silent=False): 32 | return self.run_uncached(cmd, silent=silent) 33 | 34 | def run_uncached(self, cmd, silent=False): 35 | if isinstance(cmd, str): 36 | cmd = cmd.split() 37 | 38 | out = self.execute(cmd, silent=silent) 39 | if out is None: 40 | return [] 41 | else: 42 | return out 43 | 44 | def run_live_output(self, cmd: str): 45 | if isinstance(cmd, str): 46 | cmd = cmd.split() 47 | pass 48 | 49 | def execute(self, cmd, silent=False) -> Optional[List[str]]: 50 | try: 51 | out = subprocess.run( 52 | cmd, 53 | cwd=self._workdir, 54 | text=True, 55 | capture_output=not silent, 56 | encoding=self.encoding, 57 | ) 58 | if out.returncode != 0: 59 | raise Exception(f"{cmd} error: {out.stderr}") 60 | 61 | if silent: 62 | return None 63 | 64 | return [r for r in out.stdout.split("\n") if r.strip() != ""] 65 | except subprocess.TimeoutExpired: 66 | logger.error(f"Timeout exceeded ({self.timeout} seconds)", exc_info=True) 67 | raise Exception(f"Process did not respond for {self.timeout} seconds") 68 | -------------------------------------------------------------------------------- /prospector/git/git_test.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import time 3 | 4 | import pytest 5 | 6 | from git.git import Exec, Git 7 | 8 | NEBULA = "https://github.com/slackhq/nebula" 9 | BEAM = "https://github.com/apache/beam" 10 | OPENCAST = "https://github.com/opencast/opencast" 11 | OPENCAST_COMMIT = "bbb473f34ab95497d6c432c81285efb0c739f317" 12 | 13 | 14 | COMMIT_ID = "4645e6034b9c88311856ee91d19b7328bd5878c1" 15 | COMMIT_ID_1 = "d85e24f49f9efdeed5549a7d0874e68155e25301" 16 | COMMIT_ID_2 = "b38bd36766994715ac5226bfa361cd2f8f29e31e" 17 | COMMIT_ID_3 = "ae3ee42469b7c48848d841386ca9c74b7d6bbcd8" 18 | 19 | 20 | @pytest.fixture 21 | def repository() -> Git: 22 | repo = Git(OPENCAST) # apache/beam 23 | repo.clone() 24 | return repo 25 | 26 | 27 | def test_extract_timestamp(repository: Git): 28 | commit = repository.get_commit(OPENCAST_COMMIT) 29 | commit.extract_timestamp(format_date=True) 30 | assert commit.get_timestamp() == "2020-01-16 22:34:35" 31 | commit.extract_timestamp(format_date=False) 32 | assert commit.get_timestamp() == 1579214075 33 | 34 | 35 | def test_show_tags(repository: Git): 36 | tags = repository.execute("git name-rev --tags") 37 | assert tags is not None 38 | 39 | 40 | def test_get_tags_for_commit(repository: Git): 41 | commits = repository.create_commits() 42 | commit = commits.get(OPENCAST_COMMIT) 43 | if commit is not None: 44 | tags = commit.find_tags() 45 | # assert len(tags) == 75 46 | assert "10.2" in tags and "11.3" in tags and "9.4" in tags 47 | 48 | 49 | def test_create_commits(repository: Git): 50 | commits = repository.create_commits() 51 | commit = commits.get(OPENCAST_COMMIT) 52 | assert commit.get_id() == OPENCAST_COMMIT 53 | 54 | 55 | def test_get_hunks_count(repository: Git): 56 | commits = repository.create_commits() 57 | commit = commits.get(OPENCAST_COMMIT) 58 | _, hunks = commit.get_diff() 59 | assert hunks == 7 60 | 61 | 62 | def test_get_changed_files(repository: Git): 63 | commit = repository.get_commit(OPENCAST_COMMIT) 64 | 65 | changed_files = commit.get_changed_files() 66 | assert len(changed_files) == 0 67 | 68 | 69 | def test_run_cache(): 70 | _exec = Exec(workdir=os.path.abspath(".")) 71 | start = time.time_ns() 72 | for _ in range(1000): 73 | result = _exec.run("echo 42", cache=False) 74 | assert result == ["42"] 75 | no_cache_time = time.time_ns() - start 76 | 77 | _exec = Exec(workdir=os.path.abspath(".")) 78 | start = time.time_ns() 79 | for _ in range(1000): 80 | result = _exec.run("echo 42", cache=True) 81 | assert result == ["42"] 82 | cache_time = time.time_ns() - start 83 | 84 | assert cache_time < no_cache_time 85 | -------------------------------------------------------------------------------- /prospector/git/raw_commit_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from git.git import Git 4 | from git.raw_commit import RawCommit 5 | 6 | NEBULA = "https://github.com/slackhq/nebula" 7 | BEAM = "https://github.com/apache/beam" 8 | OPENCAST = "https://github.com/opencast/opencast" 9 | OPENCAST_COMMIT = "bbb473f34ab95497d6c432c81285efb0c739f317" 10 | 11 | 12 | COMMIT_ID = "4645e6034b9c88311856ee91d19b7328bd5878c1" 13 | COMMIT_ID_1 = "d85e24f49f9efdeed5549a7d0874e68155e25301" 14 | COMMIT_ID_2 = "b38bd36766994715ac5226bfa361cd2f8f29e31e" 15 | COMMIT_ID_3 = "ae3ee42469b7c48848d841386ca9c74b7d6bbcd8" 16 | 17 | 18 | @pytest.fixture 19 | def commit(): 20 | repository = Git(OPENCAST) 21 | repository.clone() 22 | commits = repository.create_commits() 23 | return commits.get(OPENCAST_COMMIT) 24 | 25 | 26 | def test_find_tags(commit: RawCommit): 27 | tags = commit.find_tags() 28 | assert "10.2" in tags and "11.3" in tags and "9.4" in tags 29 | 30 | 31 | def test_get_diff(commit: RawCommit): 32 | diff, _ = commit.get_diff() 33 | assert diff is not None 34 | -------------------------------------------------------------------------------- /prospector/git/version_to_tag_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from git.version_to_tag import get_possible_tags 4 | 5 | 6 | @pytest.fixture 7 | def tags(): 8 | return [ 9 | "0.9-beta1", 10 | "docker-plugin-0.1", 11 | "docker-plugin-0.10.0", 12 | "docker-plugin-0.10.1", 13 | "docker-plugin-0.10.2", 14 | "docker-plugin-0.11.0", 15 | "docker-plugin-0.12.0", 16 | "docker-plugin-0.12.1", 17 | "docker-plugin-0.13.0", 18 | "docker-plugin-0.14.0", 19 | "docker-plugin-0.15.0", 20 | "docker-plugin-0.3.1", 21 | "docker-plugin-0.3.2", 22 | "docker-plugin-0.3.3", 23 | "docker-plugin-0.3.4", 24 | "docker-plugin-0.3.5", 25 | "docker-plugin-0.4", 26 | "docker-plugin-0.5", 27 | "docker-plugin-0.6", 28 | "docker-plugin-0.6.1", 29 | "docker-plugin-0.6.2", 30 | "docker-plugin-0.7", 31 | "docker-plugin-0.8", 32 | "docker-plugin-0.9.0-beta2", 33 | "docker-plugin-0.9.0-rc1", 34 | "docker-plugin-0.9.1", 35 | "docker-plugin-0.9.2", 36 | "docker-plugin-0.9.3", 37 | "docker-plugin-0.9.4", 38 | "docker-plugin-1.1", 39 | "docker-plugin-1.1.1", 40 | "docker-plugin-1.1.2", 41 | "docker-plugin-1.1.3", 42 | "docker-plugin-1.1.4", 43 | "docker-plugin-1.1.5", 44 | "docker-plugin-1.1.6", 45 | "docker-plugin-1.1.7", 46 | "docker-plugin-1.1.8", 47 | "docker-plugin-1.1.9", 48 | "docker-plugin-1.2.0", 49 | "docker-plugin-1.2.1", 50 | "docker-plugin-1.2.2", 51 | "docker-plugin-parent-0.16.0", 52 | "docker-plugin-parent-0.16.1", 53 | "docker-plugin-parent-0.16.2", 54 | "docker-plugin-parent-0.17", 55 | "docker-plugin-parent-0.18", 56 | "docker-plugin-parent-0.9.0", 57 | "docker-plugin-parent-1.0.0", 58 | "docker-plugin-parent-1.0.1", 59 | "docker-plugin-parent-1.0.2", 60 | "docker-plugin-parent-1.0.3", 61 | "docker-plugin-parent-1.0.4", 62 | "libvirt-slave-1.7", 63 | "libvirt-slave-1.8", 64 | "libvirt-slave-1.8.1", 65 | ] 66 | 67 | 68 | def test_get_possible_tags(tags): 69 | prev, next = get_possible_tags(tags, "1.1.4:1.1.6") 70 | assert prev == "docker-plugin-1.1.4" and next == "docker-plugin-1.1.6" 71 | -------------------------------------------------------------------------------- /prospector/llm/models/anthropic.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import requests 4 | from langchain_core.language_models.llms import LLM 5 | 6 | import llm.instantiation as instantiation 7 | from log.logger import logger 8 | 9 | 10 | class Anthropic(LLM): 11 | model_name: str 12 | deployment_url: str 13 | temperature: float 14 | ai_core_sk_filepath: str 15 | 16 | @property 17 | def _llm_type(self) -> str: 18 | return "SAP Anthropic" 19 | 20 | @property 21 | def _identifying_params(self) -> Dict[str, Any]: 22 | """Return a dictionary of identifying parameters.""" 23 | return { 24 | "model_name": self.model_name, 25 | "deployment_url": self.deployment_url, 26 | "temperature": self.temperature, 27 | "ai_core_sk_filepath": self.ai_core_sk_filepath, 28 | } 29 | 30 | def _call( 31 | self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any 32 | ) -> str: 33 | endpoint = f"{self.deployment_url}/invoke" 34 | headers = instantiation.get_headers(self.ai_core_sk_filepath) 35 | data = { 36 | "anthropic_version": "bedrock-2023-05-31", 37 | "max_tokens": 100, 38 | "messages": [ 39 | { 40 | "role": "user", 41 | "content": f"{prompt}", 42 | } 43 | ], 44 | "temperature": self.temperature, 45 | } 46 | 47 | try: 48 | response = requests.post(endpoint, headers=headers, json=data) 49 | response.raise_for_status() 50 | return self.parse(response.json()) 51 | except requests.exceptions.HTTPError as http_error: 52 | logger.error( 53 | f"HTTP error occurred when sending a request through AI Core: {http_error}" 54 | ) 55 | raise 56 | except requests.exceptions.Timeout as timeout_err: 57 | logger.error( 58 | f"Timeout error occured when sending a request through AI Core: {timeout_err}" 59 | ) 60 | raise 61 | except requests.exceptions.ConnectionError as conn_err: 62 | logger.error( 63 | f"Connection error occurred when sending a request through AI Core: {conn_err}" 64 | ) 65 | raise 66 | except requests.exceptions.RequestException as req_err: 67 | logger.error( 68 | f"A request error occured when sending a request through AI Core: {req_err}" 69 | ) 70 | raise 71 | 72 | def parse(self, message) -> str: 73 | """Parse the returned JSON object from OpenAI.""" 74 | return message["content"][0]["text"] 75 | -------------------------------------------------------------------------------- /prospector/llm/models/mistral.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import requests 4 | from langchain_core.language_models.llms import LLM 5 | 6 | import llm.instantiation as instantiation 7 | from log.logger import logger 8 | 9 | 10 | class Mistral(LLM): 11 | model_name: str 12 | deployment_url: str 13 | temperature: float 14 | ai_core_sk_filepath: str 15 | 16 | @property 17 | def _llm_type(self) -> str: 18 | return "SAP Mistral" 19 | 20 | @property 21 | def _identifying_params(self) -> Dict[str, Any]: 22 | """Return a dictionary of identifying parameters.""" 23 | return { 24 | "model_name": self.model_name, 25 | "deployment_url": self.deployment_url, 26 | "temperature": self.temperature, 27 | "ai_core_sk_filepath": self.ai_core_sk_filepath, 28 | } 29 | 30 | def _call( 31 | self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any 32 | ) -> str: 33 | endpoint = f"{self.deployment_url}/chat/completions" 34 | headers = instantiation.get_headers(self.ai_core_sk_filepath) 35 | data = { 36 | "model": "mistralai--mixtral-8x7b-instruct-v01", 37 | "max_tokens": 100, 38 | "temperature": self.temperature, 39 | "messages": [{"role": "user", "content": prompt}], 40 | } 41 | 42 | try: 43 | response = requests.post(endpoint, headers=headers, json=data) 44 | response.raise_for_status() 45 | return self.parse(response.json()) 46 | except requests.exceptions.HTTPError as http_error: 47 | logger.error( 48 | f"HTTP error occurred when sending a request through AI Core: {http_error}" 49 | ) 50 | raise 51 | except requests.exceptions.Timeout as timeout_err: 52 | logger.error( 53 | f"Timeout error occured when sending a request through AI Core: {timeout_err}" 54 | ) 55 | raise 56 | except requests.exceptions.ConnectionError as conn_err: 57 | logger.error( 58 | f"Connection error occurred when sending a request through AI Core: {conn_err}" 59 | ) 60 | raise 61 | except requests.exceptions.RequestException as req_err: 62 | logger.error( 63 | f"A request error occured when sending a request through AI Core: {req_err}" 64 | ) 65 | raise 66 | 67 | def parse(self, message) -> str: 68 | """Parse the returned JSON object from OpenAI.""" 69 | return message["choices"][0]["message"]["content"] 70 | -------------------------------------------------------------------------------- /prospector/llm/models/openai.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import requests 4 | from langchain_core.language_models.llms import LLM 5 | 6 | import llm.instantiation as instantiation 7 | from log.logger import logger 8 | 9 | 10 | class OpenAI(LLM): 11 | model_name: str 12 | deployment_url: str 13 | temperature: float 14 | ai_core_sk_filepath: str 15 | 16 | @property 17 | def _llm_type(self) -> str: 18 | return "SAP OpenAI" 19 | 20 | @property 21 | def _identifying_params(self) -> Dict[str, Any]: 22 | """Return a dictionary of identifying parameters.""" 23 | return { 24 | "model_name": self.model_name, 25 | "deployment_url": self.deployment_url, 26 | "temperature": self.temperature, 27 | "ai_core_sk_filepath": self.ai_core_sk_filepath, 28 | } 29 | 30 | def _call( 31 | self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any 32 | ) -> str: 33 | endpoint = f"{self.deployment_url}/chat/completions?api-version=2023-05-15" 34 | headers = instantiation.get_headers(self.ai_core_sk_filepath) 35 | data = { 36 | "messages": [ 37 | { 38 | "role": "user", 39 | "content": f"{prompt}", 40 | } 41 | ], 42 | "temperature": self.temperature, 43 | } 44 | 45 | try: 46 | response = requests.post(endpoint, headers=headers, json=data) 47 | response.raise_for_status() 48 | return self.parse(response.json()) 49 | except requests.exceptions.HTTPError as http_error: 50 | logger.error( 51 | f"HTTP error occurred when sending a request through AI Core: {http_error}" 52 | ) 53 | raise 54 | except requests.exceptions.Timeout as timeout_err: 55 | logger.error( 56 | f"Timeout error occured when sending a request through AI Core: {timeout_err}" 57 | ) 58 | raise 59 | except requests.exceptions.ConnectionError as conn_err: 60 | logger.error( 61 | f"Connection error occurred when sending a request through AI Core: {conn_err}" 62 | ) 63 | raise 64 | except requests.exceptions.RequestException as req_err: 65 | logger.error( 66 | f"A request error occured when sending a request through AI Core: {req_err}" 67 | ) 68 | raise 69 | 70 | def parse(self, message) -> str: 71 | """Parse the returned JSON object from OpenAI.""" 72 | return message["choices"][0]["message"]["content"] 73 | -------------------------------------------------------------------------------- /prospector/llm/prompts/classify_commit.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | 3 | zero_shot = PromptTemplate.from_template( 4 | """Is the following commit security relevant or not? 5 | Please provide the output as a boolean value, either True or False. 6 | If it is security relevant just answer True otherwise answer False. Do not return anything else. 7 | 8 | To provide you with some context, the name of the repository is: {repository_name}, and the 9 | commit message is: {commit_message}. 10 | 11 | Finally, here is the diff of the commit: 12 | {diff}\n 13 | 14 | 15 | Your answer:\n""" 16 | ) 17 | -------------------------------------------------------------------------------- /prospector/log/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/log/__init__.py -------------------------------------------------------------------------------- /prospector/log/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | from pprint import pformat 4 | 5 | LOGGER_NAME = "main" 6 | 7 | 8 | def pretty_log(logger: logging.Logger, obj, level: int = logging.DEBUG): 9 | as_text = pformat(obj) 10 | logger.log(level, f"Object content: {as_text}") 11 | 12 | 13 | def get_level(string: bool = False): 14 | global logger 15 | if string: 16 | return logging.getLevelName(logger.level) 17 | 18 | return logger.level 19 | 20 | 21 | def create_logger( 22 | log_file: str = "prospector.log", name: str = LOGGER_NAME 23 | ) -> logging.Logger: 24 | logger = logging.getLogger(name) 25 | logger.setLevel(logging.INFO) 26 | formatter = logging.Formatter( 27 | "%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s", 28 | "%m-%d %H:%M:%S", 29 | ) 30 | log_file = logging.handlers.RotatingFileHandler( 31 | log_file, maxBytes=2 * (10**6), backupCount=3 32 | ) 33 | log_file.setFormatter(formatter) 34 | logger.addHandler(log_file) 35 | 36 | setattr(logger, pretty_log.__name__, pretty_log) 37 | 38 | return logger 39 | 40 | 41 | logger = create_logger() 42 | -------------------------------------------------------------------------------- /prospector/pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Pipeline Usage of Prospector 2 | 3 | 4 | The pipeline works in the following way: 5 | 6 | 1. `get_cve_data()` of `filter_entries.py` first fetches the most recent CVEs' raw data. 7 | 2. This raw data get saved to the `vulnerability` table of the database. 8 | 3. Then this raw vulnerability data gets fetched from the database and filtered (`process_cve_data()` of `filter_entries.py`) 9 | 4. For each filtered CVE, a job (essentially the Prospector function and the report generation function) is created and enqueued in the Redis Queue using `enqueue_jobs()` from `job_creation.py`. 10 | 11 | ## Use the Pipeline 12 | 13 | For the pipeline to work, first run 14 | 15 | ```bash 16 | make docker-setup 17 | ``` 18 | 19 | to create the following five containers: 20 | 21 | ```bash 22 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 23 | 77e4b01ada4d prospector_backend "python ./service/ma…" 58 minutes ago Up 58 minutes 0.0.0.0:8000->8000/tcp, :::8000->8000/tcp prospector_backend_1 24 | 57a30c903a9a prospector_worker "/usr/local/bin/star…" 58 minutes ago Up 58 minutes prospector_worker_1 25 | 2ea00e47ac71 redis:alpine "docker-entrypoint.s…" 58 minutes ago Up 58 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp prospector_redis_1 26 | 120d3502ee51 postgres "docker-entrypoint.s…" 58 minutes ago Up 58 minutes 0.0.0.0:5432->5432/tcp, :::5432->5432/tcp db 27 | 1d9acef24637 adminer "entrypoint.sh php -…" 58 minutes ago Up 58 minutes 0.0.0.0:8080->8080/tcp, :::8080->8080/tcp prospector_adminer_1 28 | ``` 29 | 30 | Then enqueue the latest CVEs as jobs by running `python3 pipeline/main.py`. 31 | 32 | ### Increase the number of workers 33 | 34 | Adjust the number of workers in `etc_supervisor_confd_rqworker.conf.j2`: 35 | 36 | ```bash 37 | ... 38 | numprocs=2 39 | ... 40 | ``` 41 | 42 | ## Observe Pipeline 43 | 44 | View the database on `localhost:8080`. 45 | 46 | View the fetched vulnerabilities and generated reports on `localhost:8000`. 47 | 48 | View worker output in the terminal by running `docker attach prospector_worker_1` or the output in `prospector.log` (even though this can be difficult to read with more than 1 worker, because the logging gets all mixed up between workers). 49 | 50 | -------------------------------------------------------------------------------- /prospector/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/pipeline/__init__.py -------------------------------------------------------------------------------- /prospector/pipeline/cloning_repos.py: -------------------------------------------------------------------------------- 1 | from evaluation.utils import load_dataset 2 | from git.git import clone_repo_multiple 3 | 4 | 5 | # Get the URLs from d63.csv -> set 6 | urls = set() 7 | dataset = load_dataset( 8 | "/home/i748376/prospector/project-kb/prospector/evaluation/data/input/d63.csv" 9 | ) 10 | 11 | for cve_record in dataset: 12 | urls.add(cve_record[1]) 13 | 14 | # urls = list(urls) 15 | # urls = [ 16 | # "https://github.com/Turistforeningen/node-im-resize", # CVE-2019-10787 17 | # "https://github.com/remy/undefsafe", # CVE-2019-10795 18 | # "https://github.com/Froxlor/Froxlor", # CVE-2020-10236 19 | # "https://github.com/jnunemaker/crack", 20 | # "https://github.com/django-tastypie/django-tastypie", 21 | # "https://github.com/pyinstaller/pyinstaller", 22 | # "https://github.com/rails/rails-html-sanitizer", 23 | # "https://github.com/scipy/scipy", 24 | # "https://github.com/parcel-bundler/parcel", 25 | # "https://github.com/javamelody/javamelody", 26 | # ] 27 | 28 | print(f"Retrieved {len(urls)} distinct repositories from the dataset.") 29 | 30 | # Call clone_repo_multiple() on this set 31 | results = clone_repo_multiple( 32 | urls, 33 | output_folder="/home/i748376/data/gitcache", 34 | skip_existing=False, 35 | shallow=False, 36 | concurrent=1, 37 | ) 38 | 39 | print("Cloning completed. Results: ", results) 40 | -------------------------------------------------------------------------------- /prospector/pipeline/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from cli.console import ConsoleWriter, MessageStatus 3 | from pipeline.filter_entries import ( 4 | get_cve_data, 5 | process_cve_data, 6 | save_cves_to_db, 7 | ) 8 | from pipeline.job_creation import enqueue_jobs 9 | 10 | 11 | DAYS_AGO = 5 # Time period from DAYS_AGO to now to retrieve CVEs from NVD 12 | 13 | 14 | async def dispatch_jobs(): 15 | """Gets CVEs from the last X days, filters them and enqueues them in the 16 | Queue. Workers fetch the jobs and execute the Prospector function on them. 17 | """ 18 | # Retrieve the CVE data 19 | cve_data = await get_cve_data(DAYS_AGO) 20 | 21 | # Save data to the vulnerabilities table in the database 22 | save_cves_to_db(cve_data) 23 | 24 | # get entry from db and process 25 | _ = await process_cve_data() 26 | 27 | await enqueue_jobs(reports_filepath="pipeline/reports/") 28 | 29 | 30 | async def main(): 31 | """Starting point to enqueue jobs into the pipeline""" 32 | ConsoleWriter.print("Starting pipeline\n", status=MessageStatus.OK) 33 | await dispatch_jobs() 34 | 35 | 36 | if __name__ == "__main__": 37 | asyncio.run(main()) 38 | -------------------------------------------------------------------------------- /prospector/prospector.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | verbose=false 3 | backend=http://127.0.0.1:8000 4 | ;nvd_rest_endpoint=https://services.nvd.nist.gov/rest/json/cve/1.0/ 5 | nvd_rest_endpoint=http://localhost:8000/nvd/vulnerabilities/ 6 | report=html -------------------------------------------------------------------------------- /prospector/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pytest.ini_options] 2 | minversion = "6.0" 3 | addopts = "-ra -q --cov-config=.coveragerc --cov-report html:cov_html --cov=." 4 | testpaths = [ 5 | "commitdb", 6 | "datamodel", 7 | "client", 8 | "git", 9 | "api", 10 | "filtering", 11 | "stats", 12 | "util", 13 | "llm", 14 | ] 15 | 16 | [tool.isort] 17 | profile = "black" 18 | multi_line_output = 3 19 | -------------------------------------------------------------------------------- /prospector/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | black 3 | flake8 4 | matplotlib 5 | pylint 6 | pytest 7 | pytest-cov 8 | pycodestyle 9 | pre-commit 10 | requests-mock 11 | seaborn -------------------------------------------------------------------------------- /prospector/requirements.in: -------------------------------------------------------------------------------- 1 | aiohttp 2 | aiofiles 3 | beautifulsoup4 4 | colorama 5 | datasketch 6 | fastapi 7 | google-cloud-aiplatform==1.49.0 8 | Jinja2 9 | langchain 10 | langchain_anthropic 11 | langchain_openai 12 | langchain_google_vertexai 13 | langchain_mistralai 14 | langchain_community 15 | omegaconf 16 | pandas 17 | plac 18 | psycopg2 19 | pydantic 20 | pytest 21 | python_dateutil 22 | python-dotenv 23 | redis 24 | requests 25 | requests_cache==0.9.6 26 | rq 27 | spacy 28 | tqdm 29 | uvicorn 30 | validators 31 | -------------------------------------------------------------------------------- /prospector/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/rules/__init__.py -------------------------------------------------------------------------------- /prospector/run_prospector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_NAME="prospector-cli" 4 | 5 | # check if base image and containers are spawned up 6 | if [[ "$(docker images -q prospector-base:1.0 2> /dev/null)" == "" && "$(docker-compose ps -q backend db)" == "" ]]; then 7 | docker build -t prospector-base:1.0 -f docker/Dockerfile . 8 | docker-compose up -d --build 9 | fi 10 | 11 | # check if image is already built 12 | if [[ "$(docker images -q $IMAGE_NAME 2> /dev/null)" == "" ]]; then 13 | # build the docker image 14 | docker build -t $IMAGE_NAME -f docker/cli/Dockerfile . 15 | fi 16 | 17 | # Function to extract the value of a specific option 18 | get_option_value() { 19 | while [[ $# -gt 0 ]]; do 20 | if [[ $1 == "--report-filename" ]]; then 21 | echo "$2" 22 | return 23 | fi 24 | shift 25 | done 26 | } 27 | 28 | REPORT_FILENAME=$(get_option_value "$@") 29 | # echo $REPORT_FILENAME # Sanity Check 30 | if [[ -z $REPORT_FILENAME ]]; then 31 | OUTPUT_DIR="" 32 | else 33 | OUTPUT_DIR=$(dirname "$REPORT_FILENAME") 34 | fi 35 | # echo $OUTPUT_DIR 36 | # echo $(pwd)/$OUTPUT_DIR # Sanity Check 37 | 38 | # run the docker container 39 | docker run --network=prospector_default --rm -t \ 40 | --user $(id -u):$(id -g) \ 41 | -v $(pwd)/$OUTPUT_DIR:/app/$OUTPUT_DIR \ 42 | -v ${GIT_CACHE_HOST}:/tmp/gitcache \ 43 | $IMAGE_NAME \ 44 | "$@" 45 | -------------------------------------------------------------------------------- /prospector/service/api/README.md: -------------------------------------------------------------------------------- 1 | # API for Prospector Backend 2 | 3 | ## Authentication 4 | 5 | `GET /users/` 6 | 7 | * Return: all users data 8 | 9 | `GET /users/` 10 | 11 | * Return: data for user 12 | 13 | `GET /users/me` 14 | 15 | * Return: data for the currently authenticated user 16 | 17 | `POST /token` 18 | 19 | * Input: user/pass 20 | * Return: auth. token 21 | 22 | ## Searching for fix-commits 23 | 24 | `POST /search` 25 | 26 | * Input: advisory record 27 | * Output: a reference to the job, to retrieve the results later 28 | 29 | `GET /search/` 30 | 31 | * Input: the job id 32 | * Output: status of the job and results (if completed) 33 | 34 | ## Model management 35 | 36 | .... 37 | 38 | ## Data management 39 | 40 | `POST /preprocessed-data` 41 | 42 | 43 | `GET /data/commits` 44 | -------------------------------------------------------------------------------- /prospector/service/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/service/api/__init__.py -------------------------------------------------------------------------------- /prospector/service/api/api_test.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from service.main import app 4 | from datamodel.commit import Commit 5 | 6 | client = TestClient(app) 7 | 8 | 9 | def test_status(): 10 | response = client.get("/status") 11 | assert response.status_code == 200 12 | assert response.json() == {"status": "ok"} 13 | 14 | 15 | def test_post_preprocessed_commits(): 16 | commit_1 = Commit( 17 | repository="https://github.com/apache/dubbo", commit_id="yyy" 18 | ).as_dict() 19 | commit_2 = Commit( 20 | repository="https://github.com/apache/dubbo", commit_id="zzz" 21 | ).as_dict() 22 | commit_3 = Commit( 23 | repository="https://github.com/apache/struts", commit_id="bbb" 24 | ).as_dict() 25 | commits = [commit_1, commit_2, commit_3] 26 | response = client.post("/commits/", json=commits) 27 | assert response.status_code == 200 28 | assert response.json() == {"status": "ok"} 29 | 30 | 31 | def test_get_specific_commit(): 32 | repository = "https://github.com/apache/dubbo" 33 | commit_id = "yyy" 34 | print(client) 35 | response = client.get("/commits/" + repository + "?commit_id=" + commit_id) 36 | print(f"Response: {response}, {response.reason_phrase}") 37 | assert response.status_code == 200 38 | assert response.json()[0]["commit_id"] == commit_id 39 | 40 | 41 | def test_get_commits_by_repository(): 42 | repository = "https://github.com/apache/dubbo" 43 | response = client.get("/commits/" + repository) 44 | assert response.status_code == 200 45 | -------------------------------------------------------------------------------- /prospector/service/api/dependencies.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from fastapi import Depends, HTTPException, status 4 | from fastapi.security import OAuth2PasswordBearer 5 | from pydantic import BaseModel 6 | 7 | # ====================================== 8 | # AUTH STUFF 9 | # The following is taken from https://fastapi.tiangolo.com/tutorial/security/first-steps/ 10 | # with slight modifications to support roles. 11 | # This will be moved to a separate file eventually, 12 | # as explained here: https://fastapi.tiangolo.com/tutorial/bigger-applications/ 13 | 14 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="users/token") 15 | 16 | fake_users_db = { 17 | "johndoe": { 18 | "username": "johndoe", 19 | "full_name": "John Doe", 20 | "email": "johndoe@example.com", 21 | "roles": ["user"], 22 | "hashed_password": "fakehashedsecret", 23 | "disabled": False, 24 | }, 25 | "alice": { 26 | "username": "alice", 27 | "full_name": "Alice Wonderson", 28 | "email": "alice@example.com", 29 | "roles": ["user", "admin"], 30 | "hashed_password": "fakehashedsecret2", 31 | "disabled": False, 32 | }, 33 | } 34 | 35 | 36 | class User(BaseModel): 37 | username: str 38 | email: Optional[str] = None 39 | roles: Optional[list] = [] 40 | full_name: Optional[str] = None 41 | disabled: Optional[bool] = None 42 | 43 | 44 | class UserInDB(User): 45 | hashed_password: str 46 | 47 | 48 | def get_user(db, username: str): 49 | if username in db: 50 | user_dict = db[username] 51 | return UserInDB(**user_dict) 52 | return None 53 | 54 | 55 | def fake_decode_token(token): 56 | # TODO This doesn't provide any security at all 57 | user = get_user(fake_users_db, token) 58 | return user 59 | 60 | 61 | def fake_hash_password(password: str): 62 | # TODO This doesn't provide any security at all 63 | return "fakehashed" + password 64 | 65 | 66 | async def get_current_user(token: str = Depends(oauth2_scheme)): 67 | user = fake_decode_token(token) 68 | if not user: 69 | raise HTTPException( 70 | status_code=status.HTTP_401_UNAUTHORIZED, 71 | detail="Invalid authentication credentials", 72 | headers={"WWW-Authenticate": "Bearer"}, 73 | ) 74 | return user 75 | 76 | 77 | async def get_current_active_user(current_user: User = Depends(get_current_user)): 78 | if current_user.disabled: 79 | raise HTTPException(status_code=400, detail="Inactive user") 80 | return current_user 81 | -------------------------------------------------------------------------------- /prospector/service/api/routers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/service/api/routers/__init__.py -------------------------------------------------------------------------------- /prospector/service/api/routers/home.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request 2 | from fastapi.responses import HTMLResponse 3 | from fastapi.templating import Jinja2Templates 4 | 5 | from util.config_parser import parse_config_file 6 | from service.api.routers.jobs import connect_to_db 7 | 8 | # from core.report import generate_report 9 | 10 | router = APIRouter( 11 | responses={404: {"description": "Not found"}}, 12 | ) 13 | 14 | templates = Jinja2Templates(directory="service/static") 15 | 16 | config = parse_config_file() 17 | redis_url = config.redis_url 18 | 19 | 20 | # endpoint for monitoring all job status 21 | @router.get("/home", response_class=HTMLResponse) 22 | async def home(request: Request): 23 | db = connect_to_db() 24 | joblist = db.get_all_jobs() 25 | return templates.TemplateResponse( 26 | "index.html", {"request": request, "joblist": joblist} 27 | ) 28 | -------------------------------------------------------------------------------- /prospector/service/api/routers/preprocessed.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from fastapi import APIRouter, HTTPException 4 | from fastapi.responses import JSONResponse 5 | 6 | from backenddb.postgres import PostgresBackendDB 7 | from util.config_parser import parse_config_file 8 | 9 | config = parse_config_file() 10 | 11 | 12 | router = APIRouter( 13 | prefix="/commits", 14 | tags=["commits"], 15 | responses={404: {"description": "Not found"}}, 16 | ) 17 | 18 | 19 | # ----------------------------------------------------------------------------- 20 | @router.get("/{repository_url:path}", status_code=200) 21 | async def get_commits( 22 | repository_url: str, 23 | commit_id: Optional[str] = None, 24 | ): 25 | db = PostgresBackendDB( 26 | config.database.user, 27 | config.database.password, 28 | config.database.host, 29 | config.database.port, 30 | config.database.dbname, 31 | ) 32 | db.connect() 33 | data = db.lookup(repository_url, commit_id) 34 | 35 | if len(data) == 0: 36 | raise HTTPException(status_code=404, detail="Commit not found") 37 | 38 | return JSONResponse(data) 39 | 40 | 41 | # ----------------------------------------------------------------------------- 42 | @router.post("/") 43 | async def upload_preprocessed_commit(payload: List[Dict[str, Any]]): 44 | 45 | db = PostgresBackendDB( 46 | config.database.user, 47 | config.database.password, 48 | config.database.host, 49 | config.database.port, 50 | config.database.dbname, 51 | ) 52 | db.connect() 53 | 54 | for commit in payload: 55 | db.save(commit) 56 | 57 | return {"status": "ok"} 58 | -------------------------------------------------------------------------------- /prospector/service/api/routers/users.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, HTTPException 2 | from fastapi.security import OAuth2PasswordRequestForm 3 | 4 | from ..dependencies import ( 5 | User, 6 | UserInDB, 7 | fake_hash_password, 8 | fake_users_db, 9 | get_current_active_user, 10 | oauth2_scheme, 11 | ) 12 | 13 | # from http import HTTPStatus 14 | 15 | 16 | router = APIRouter( 17 | prefix="/users", 18 | tags=["users"], 19 | # dependencies=[Depends(oauth2_scheme)], 20 | responses={404: {"description": "Not found"}}, 21 | ) 22 | 23 | 24 | @router.post("/token") 25 | async def login(form_data: OAuth2PasswordRequestForm = Depends()): 26 | user_dict = fake_users_db.get(form_data.username) 27 | if not user_dict: 28 | raise HTTPException( 29 | status_code=400, detail="Incorrect username or password" 30 | ) 31 | user = UserInDB(**user_dict) 32 | hashed_password = fake_hash_password(form_data.password) 33 | if not hashed_password == user.hashed_password: 34 | raise HTTPException( 35 | status_code=400, detail="Incorrect username or password" 36 | ) 37 | 38 | return {"access_token": user.username, "token_type": "bearer"} 39 | 40 | 41 | @router.get("/me") 42 | async def read_users_me(current_user: User = Depends(get_current_active_user)): 43 | return current_user 44 | 45 | 46 | @router.get("/me2/") 47 | async def read_items(user: str = Depends(oauth2_scheme)): 48 | return {"current_user": user} 49 | -------------------------------------------------------------------------------- /prospector/service/main.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | 3 | # from .dependencies import oauth2_scheme 4 | from api.routers import feeds, jobs, nvd, preprocessed, users 5 | from fastapi import FastAPI 6 | from fastapi.middleware.cors import CORSMiddleware 7 | from fastapi.responses import HTMLResponse, RedirectResponse 8 | from fastapi.staticfiles import StaticFiles 9 | 10 | from log.logger import logger 11 | from util.config_parser import parse_config_file 12 | 13 | api_metadata = [ 14 | { 15 | "name": "data", 16 | "description": "Operations with data used to train ML models.", 17 | }, 18 | { 19 | "name": "jobs", 20 | "description": "Manage jobs.", 21 | "externalDocs": { 22 | "description": "Items external docs", 23 | "url": "https://fastapi.tiangolo.com/", 24 | }, 25 | }, 26 | ] 27 | 28 | app = FastAPI(openapi_tags=api_metadata) 29 | 30 | app.add_middleware( 31 | CORSMiddleware, 32 | allow_origins=["http://localhost:3000", "localhost:3000"], 33 | allow_credentials=True, 34 | allow_methods=["*"], 35 | allow_headers=["*"], 36 | ) 37 | 38 | app.include_router(users.router) 39 | app.include_router(nvd.router) 40 | app.include_router(preprocessed.router) 41 | app.include_router(feeds.router) 42 | app.include_router(jobs.router) 43 | 44 | app.mount("/static", StaticFiles(directory="service/static"), name="static") 45 | app.mount( 46 | "/reports", StaticFiles(directory="./pipeline/reports"), name="reports" 47 | ) 48 | 49 | 50 | # ----------------------------------------------------------------------------- 51 | @app.get("/", response_class=HTMLResponse) 52 | async def read_index(): 53 | response = RedirectResponse(url="static/feed.html") 54 | return response 55 | 56 | 57 | # ----------------------------------------------------------------------------- 58 | @app.get("/status") 59 | async def get_status(): 60 | return {"status": "ok"} 61 | 62 | 63 | if __name__ == "__main__": 64 | config = parse_config_file() 65 | logger.setLevel(config.log_level) 66 | 67 | uvicorn.run( 68 | app, 69 | host="0.0.0.0", 70 | port=8000, 71 | ) 72 | -------------------------------------------------------------------------------- /prospector/service/static/feed.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Vulnerabilities 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 |
    19 |
    20 |
    21 |

    Vulnerabilities

    22 |
    23 | 24 | 25 | 26 | 27 | 28 |
    29 | Reports 30 | Job list 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 |
    Vuln IdPublished DateLast Mod DateSource
    44 |
    45 |
    46 |
    47 | 48 | -------------------------------------------------------------------------------- /prospector/service/static/feed.js: -------------------------------------------------------------------------------- 1 | // Function to call the /process_vulns API 2 | async function processVulns() { 3 | fetch('/feeds/process_vulns', { method: 'POST' }) 4 | .then(response => response.json()) 5 | .then(data => { 6 | console.log(data); 7 | alert(JSON.stringify(data)); 8 | }) 9 | .catch(error => { 10 | console.error(error); 11 | }); 12 | } 13 | 14 | // Function to call the /create_jobs API 15 | async function createJobs() { 16 | await fetch('/feeds/create_jobs', { method: 'POST' }) 17 | .then(response => response.json()) 18 | .then(data => { 19 | console.log(data); 20 | alert(JSON.stringify(data)); 21 | }) 22 | .catch(error => { 23 | console.error(error); 24 | }); 25 | } 26 | 27 | 28 | // Function to call the /create_jobs API 29 | async function fetchVulns() { 30 | const timeRange = document.getElementById("time_range").value; 31 | 32 | fetch('/feeds/fetch_vulns/' + timeRange, { method: 'GET' }) 33 | .then(response => response.json()) 34 | .then(data => { 35 | console.log(data); 36 | fetchVulnData() 37 | }) 38 | .catch(error => { 39 | console.error(error); 40 | }); 41 | } 42 | 43 | 44 | 45 | // Function to update the job table with new data 46 | async function updatefeedTable(vulnList) { 47 | const tableBody = $('#vuln-table tbody'); 48 | tableBody.empty(); 49 | 50 | for (const vuln of vulnList) { 51 | const row = $('').addClass('highlight'); 52 | 53 | const vulnIdCell = $('').text(vuln.vuln_id); 54 | row.append(vulnIdCell); 55 | const pubDateCell = $('').text(vuln.published_date); 56 | row.append(pubDateCell); 57 | const modDateCell = $('').text(vuln.last_modified_date); 58 | row.append(modDateCell); 59 | const sourceCell = $('').text(vuln.source); 60 | row.append(sourceCell); 61 | 62 | tableBody.append(row); 63 | } 64 | } 65 | 66 | // Function to fetch job data from the /jobs endpoint and update the table 67 | async function fetchVulnData() { 68 | fetch('/feeds') 69 | .then(response => response.json()) 70 | .then(data => { 71 | updatefeedTable(data); 72 | }) 73 | .catch(error => { 74 | console.error(error); 75 | }); 76 | } 77 | -------------------------------------------------------------------------------- /prospector/service/static/index.css: -------------------------------------------------------------------------------- 1 | tr.highlight:hover { 2 | background-color: #E6F0FF; 3 | } 4 | 5 | #reports { 6 | position: absolute; 7 | top: 40px; 8 | right: 150px; 9 | } 10 | 11 | #joblist { 12 | position: absolute; 13 | top: 40px; 14 | right: 20px; 15 | } 16 | -------------------------------------------------------------------------------- /prospector/service/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Jobs 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 17 | 18 |
    19 |
    20 |
    21 |

    Job list

    22 | Feed 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 |
    StateIdInfoModify
    36 |
    37 |
    38 |
    39 | 40 | -------------------------------------------------------------------------------- /prospector/service/static/index.js: -------------------------------------------------------------------------------- 1 | // Function to update the job table with new data 2 | async function updateJobTable(jobList) { 3 | const tableBody = $('#job-table tbody'); 4 | tableBody.empty(); 5 | 6 | for (const job of jobList) { 7 | const row = $('').addClass('highlight'); 8 | 9 | const statusBadge = $('').html(`${job.status}`); 14 | row.append(statusBadge); 15 | 16 | const jobIdCell = $('').text(job._id); 17 | row.append(jobIdCell); 18 | 19 | const resultCell = $(''); 20 | const configureBtn1 = $(' 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /prospector/service/static/job_configuration.js: -------------------------------------------------------------------------------- 1 | 2 | // Function to retrieve the job ID from the query string 3 | function getJobIdFromQueryString() { 4 | const urlParams = new URLSearchParams(window.location.search); 5 | return urlParams.get('jobId'); 6 | } 7 | 8 | function populatePage() { 9 | jobId = getJobIdFromQueryString() 10 | fetch(`/jobs/${jobId}`, { method: 'GET' }) 11 | .then(response => response.json()) 12 | .then(data => { 13 | const jobData = data.job_data; 14 | document.getElementById('job-id').textContent = jobData.job_id; 15 | document.getElementById('job-status').textContent = jobData.job_status 16 | const jobParams = jobData.job_params.slice(1, -1).split(','); 17 | document.getElementById('repo').value = jobParams[1]; 18 | document.getElementById('versions').value = jobParams[2] 19 | }) 20 | .catch(error => { 21 | console.log('Error:', error); 22 | }); 23 | } 24 | 25 | // Function to enqueue the job 26 | function callEnqueue() { 27 | jobId = getJobIdFromQueryString() 28 | const repoInput = document.getElementById('repo').value; 29 | const versionsInput = document.getElementById('versions').value; 30 | 31 | const requestBody = { 32 | repo: repoInput, 33 | version: versionsInput, 34 | created_from: jobId 35 | }; 36 | 37 | console.log('Request Body:', requestBody); 38 | 39 | fetch(`/jobs/`, { 40 | method: 'POST', 41 | headers: { 42 | 'Content-Type': 'application/json', 43 | }, 44 | body: JSON.stringify(requestBody), 45 | }) 46 | .then(response => { 47 | console.log('Job enqueued successfully'); 48 | }) 49 | .catch(error => { 50 | console.log('Error:', error); 51 | }); 52 | } 53 | -------------------------------------------------------------------------------- /prospector/service/static/job_info.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Arial, sans-serif; 3 | margin: 20px; 4 | } 5 | 6 | .job-details { 7 | max-width: 600px; 8 | margin: 0 auto; 9 | padding: 20px; 10 | background-color: #f8f8f8; 11 | border-radius: 5px; 12 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); 13 | } 14 | 15 | .job-details h3 { 16 | color: #333; 17 | margin-top: 0; 18 | } 19 | 20 | .job-details p { 21 | margin: 0; 22 | color: #666; 23 | } 24 | 25 | .job-details .field { 26 | margin-top: 20px; 27 | } 28 | 29 | .job-details .field label { 30 | font-weight: bold; 31 | display: block; 32 | } 33 | 34 | .job-details .field span { 35 | color: #888; 36 | margin-left: 5px; 37 | } 38 | -------------------------------------------------------------------------------- /prospector/service/static/job_info.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Job Details 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
    15 |

    Job Details

    16 |
    17 | 18 | 19 |
    20 |
    21 | 22 | 23 |
    24 |
    25 | 26 | 27 |
    28 |
    29 | 30 | 31 |
    32 |
    33 | 34 | 35 |
    36 |
    37 | 38 | 39 |
    40 |
    41 | 42 | 43 |
    44 |
    45 | 46 | 47 |
    48 |
    49 | 50 | 51 |
    52 |
    53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /prospector/service/static/job_info.js: -------------------------------------------------------------------------------- 1 | 2 | function getJobIdFromQueryString() { 3 | const urlParams = new URLSearchParams(window.location.search); 4 | return urlParams.get('jobId'); 5 | } 6 | 7 | function JobInfoPage() { 8 | jobId = getJobIdFromQueryString() 9 | fetch(`/jobs/${jobId}`, { method: 'GET' }) 10 | .then(response => response.json()) 11 | .then(data => { 12 | const jobData = data.job_data; 13 | document.getElementById('job-id').textContent = jobData.job_id; 14 | document.getElementById('job-params').textContent = jobData.job_params; 15 | document.getElementById('job-enqueued').textContent = jobData.job_enqueued_at; 16 | document.getElementById('job-started').textContent = jobData.job_started_at; 17 | document.getElementById('job-finished').textContent = jobData.job_finished_at; 18 | document.getElementById('job-result').textContent = jobData.job_results; 19 | document.getElementById('job-created-by').textContent = jobData.job_created_by; 20 | document.getElementById('job-created-from').textContent = jobData.job_created_from; 21 | document.getElementById('job-status').textContent = jobData.job_status; 22 | }) 23 | .catch(error => { 24 | console.log('Error:', error); 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /prospector/service/static/report_list.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Job List 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 |
    18 |
    19 |
    20 |

    Report list

    21 |

    List of all the reports

    22 | 23 | 24 | {% for report in report_list %} 25 | 26 | 28 | 29 | 30 | {% endfor %} 31 | 32 |
    {{report.0}} 27 | {{ report.1.strftime('%Y-%m-%d %H:%M') }}
    33 |
    34 |
    35 |
    36 | 37 | -------------------------------------------------------------------------------- /prospector/stats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/stats/__init__.py -------------------------------------------------------------------------------- /prospector/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SAP/project-kb/44a77d344a724559479c84da419b7dbc9f0bca42/prospector/util/__init__.py -------------------------------------------------------------------------------- /prospector/util/config_parser_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from util.config_parser import get_configuration 4 | 5 | 6 | @pytest.mark.skip(reason="Let's skip this for now") 7 | def test_get_configuration(): 8 | """Test get_configuration()""" 9 | config = get_configuration(None) 10 | assert config is not None 11 | -------------------------------------------------------------------------------- /prospector/util/inspection.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Tuple 3 | 4 | 5 | def caller_name(skip=2) -> Tuple[str, ...]: 6 | """Get a name of a caller in the format module.class.method 7 | Note: It will not work correctly for static methods in classes. 8 | 9 | `skip` specifies how many levels of stack to skip while getting caller 10 | name. skip=1 means "who calls me", skip=2 "who calls my caller" etc. 11 | 12 | An empty string is returned if skipped levels exceed stack height 13 | """ 14 | stack = inspect.stack() 15 | start = 0 + skip 16 | if len(stack) < start + 1: 17 | return tuple() 18 | parent_frame = stack[start][0] 19 | name = [] 20 | module = inspect.getmodule(parent_frame) 21 | # `modname` can be None when frame is executed directly in console 22 | if module: 23 | name.extend(module.__name__.split(".")) 24 | # detect classname 25 | if "self" in parent_frame.f_locals: 26 | # I don't know any way to detect call from the object method 27 | # there seems to be no way to detect static method call - it will be just a function call 28 | name.append(parent_frame.f_locals["self"].__class__.__qualname__) 29 | codename = parent_frame.f_code.co_name 30 | if codename != "": # top level usually 31 | name.append(codename) # function or a method 32 | del parent_frame 33 | return tuple(name) 34 | -------------------------------------------------------------------------------- /prospector/util/lsh.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import pickle 3 | from typing import List 4 | 5 | from datasketch import MinHash, MinHashLSH 6 | from datasketch.lean_minhash import LeanMinHash 7 | 8 | PERMUTATIONS = 128 9 | # TODO: Which threshold to use? 10 | THRESHOLD = 0.8 11 | 12 | 13 | def get_encoded_minhash(string: str) -> str: 14 | """Compute a MinHash object from a string and encode it""" 15 | return encode_minhash(compute_minhash(string)) 16 | 17 | 18 | def string_encoder(string: str) -> List[bytes]: 19 | """Encode a string into a list of bytes (utf-8)""" 20 | return [w.encode("utf-8") for w in string.split()] 21 | 22 | 23 | def encode_minhash(mhash: LeanMinHash) -> str: 24 | """Encode a LeanMinHash object into a string""" 25 | return base64.b64encode(pickle.dumps(mhash)).decode("utf-8") 26 | buf = bytearray(mhash.bytesize()) 27 | mhash.serialize(buf) 28 | return buf 29 | 30 | 31 | def decode_minhash(buf: str) -> LeanMinHash: 32 | """Decode a LeanMinHash object from a string""" 33 | return pickle.loads(base64.b64decode(buf.encode("utf-8"))) 34 | 35 | 36 | def compute_minhash(string: str) -> LeanMinHash: 37 | """Compute a MinHash object from a string""" 38 | m = MinHash(num_perm=PERMUTATIONS) 39 | for d in string_encoder(string): 40 | m.update(d) 41 | return LeanMinHash(m) 42 | 43 | 44 | def compute_multiple_minhashes(strings: List[str]) -> List[LeanMinHash]: 45 | """Compute multiple MinHash objects from a list of strings""" 46 | return [ 47 | LeanMinHash(mh) 48 | for mh in MinHash.bulk( 49 | [string_encoder(s) for s in strings], num_perm=PERMUTATIONS 50 | ) 51 | ] 52 | 53 | 54 | def create(threshold: float, permutations: int): 55 | return MinHashLSH(threshold=threshold, num_perm=permutations) 56 | 57 | 58 | def insert(lsh: MinHashLSH, id: str, hash: LeanMinHash): 59 | lsh.insert(id, hash) 60 | 61 | 62 | def build_lsh_index() -> MinHashLSH: 63 | return MinHashLSH(threshold=THRESHOLD, num_perm=PERMUTATIONS) 64 | 65 | 66 | def create_lsh_from_data(ids: List[str], data: List[str]) -> MinHashLSH: 67 | """Create a MinHashLSH object from a list of strings""" 68 | lsh = MinHashLSH(threshold=THRESHOLD, num_perm=PERMUTATIONS) 69 | mhashes = compute_multiple_minhashes(data) 70 | for id, hash in zip(ids, mhashes): 71 | lsh.insert(id, hash) 72 | return lsh 73 | 74 | 75 | def query_lsh(lsh: MinHashLSH, string: str) -> List[str]: 76 | """Query a MinHashLSH object with a string""" 77 | mhash = compute_minhash(string) 78 | return lsh.query(mhash) 79 | -------------------------------------------------------------------------------- /prospector/util/profile.py: -------------------------------------------------------------------------------- 1 | import cProfile 2 | import io 3 | import pstats 4 | 5 | 6 | def profile(fnc): 7 | def inner(*args, **kwargs): 8 | 9 | pr = cProfile.Profile() 10 | pr.enable() 11 | retval = fnc(*args, **kwargs) 12 | pr.disable() 13 | s = io.StringIO() 14 | sortby = "cumulative" 15 | ps = pstats.Stats(pr, stream=s).sort_stats(sortby) 16 | ps.print_stats() 17 | print(s.getvalue()) 18 | return retval 19 | 20 | return inner 21 | -------------------------------------------------------------------------------- /prospector/util/report_analyzer.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import re 4 | from typing import Dict, List 5 | 6 | 7 | def load_dataset_file(file_path: str) -> List[Dict]: 8 | with open(file_path, "r") as f: 9 | reader = csv.reader(f, delimiter=";") 10 | return [row for row in reader if bool(re.match(r"CVE-\d{4}-\d{4,8}", row[0]))] 11 | 12 | 13 | def check_rule_strenght(rules: List[Dict]): 14 | for rule in rules: 15 | if rule["id"] == "COMMIT_IN_REFERENCE": 16 | return 1 17 | if rule["relevance"] > 30: 18 | return 2 19 | 20 | return 0 21 | 22 | 23 | def analyze_report(report_file: str, commits: str): 24 | with open(report_file, "r") as f: 25 | report = json.load(f) 26 | for rank, commit in enumerate(report["commits"]): 27 | 28 | rules_strenght = check_rule_strenght(commit["matched_rules"]) 29 | 30 | # If the commit is contained in the ground truth 31 | if commit["commid_id"] in commits: 32 | return ( 33 | True, 34 | rules_strenght, 35 | commit["commit_id"], 36 | rank, 37 | ) 38 | # If a twin of the commit is contained in the ground truth 39 | for twin in commit["twins"]: 40 | if twin[1] in commits: 41 | return ( 42 | True, 43 | rules_strenght, 44 | commit["commit_id"], 45 | rank, 46 | ) 47 | # If the commit is not contained in the ground truth but matches a strong rule 48 | if rules_strenght > 0: 49 | return ( 50 | False, 51 | rules_strenght, 52 | commit["commit_id"], 53 | rank, 54 | ) 55 | return False, 0, "", -1 56 | 57 | 58 | def analyze_results(dataset_path: str): 59 | cves = load_dataset_file(dataset_path) 60 | results = { 61 | "analyzed": {"found": {}, "not_found": {}}, 62 | "not_analyzed": [], 63 | "not_sure": [], 64 | } 65 | for cve in cves: 66 | try: 67 | is_fix, rule_type, commit, rank = analyze_report( 68 | f"{dataset_path[:-4]}/{cve[0]}.json", cve[4] 69 | ) 70 | except FileNotFoundError: 71 | results["not_analyzed"].append(cve[0]) 72 | -------------------------------------------------------------------------------- /prospector/util/singleton.py: -------------------------------------------------------------------------------- 1 | from log.logger import logger 2 | 3 | 4 | class Singleton(type): 5 | """Singleton class to ensure that any class inheriting from this one can only be instantiated once.""" 6 | 7 | _instances = {} 8 | 9 | def __call__(cls, *args, **kwargs): 10 | if cls not in cls._instances: 11 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 12 | else: 13 | logger.info( 14 | f"Cannot instantiate a Singleton twice. Returning already existing instance of class {cls}." 15 | ) 16 | return cls._instances[cls] 17 | -------------------------------------------------------------------------------- /prospector/util/test_type_safety.py: -------------------------------------------------------------------------------- 1 | from util.type_safety import is_instance_of_either 2 | 3 | 4 | def test_is_instance_of_either(): 5 | assert is_instance_of_either([0, 1, 2, 3], int) is True 6 | assert is_instance_of_either(["0", 1, 2, 3], int) is False 7 | assert is_instance_of_either([1.34, 2.2, 3.5], float) is True 8 | assert is_instance_of_either([1.34, 2.2, "3.5"], float) is False 9 | assert is_instance_of_either([1, 2.2, 3.5, 42], int, float) is True 10 | -------------------------------------------------------------------------------- /prospector/util/tokenize.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Tuple 3 | 4 | 5 | def split_by_non_word(*texts: str) -> Tuple[str, ...]: 6 | for text in texts: 7 | yield from [part for part in re.split(r"[\W_]", text) if part != ""] 8 | 9 | 10 | def split_by_upper_cases(*texts: str) -> Tuple[str, ...]: 11 | for text in texts: 12 | yield from [ 13 | part 14 | for part in re.sub(r"([A-Z])", r" \1", text).split(sep=" ") 15 | if part != "" 16 | ] 17 | 18 | 19 | def lower_all(*texts: str) -> Tuple[str, ...]: 20 | return tuple(text.lower() for text in texts) 21 | 22 | 23 | def tokenize_non_nl_term(term: str) -> Tuple[str, ...]: 24 | return lower_all(*split_by_non_word(*split_by_upper_cases(term))) 25 | -------------------------------------------------------------------------------- /prospector/util/type_safety.py: -------------------------------------------------------------------------------- 1 | def is_instance_of_either(collection, *types_to_check) -> bool: 2 | for item in collection: 3 | item_is_good = False 4 | for _type in types_to_check: 5 | if isinstance(item, _type): 6 | item_is_good = True 7 | break 8 | if not item_is_good: 9 | return False 10 | return True 11 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | PROJECT_ROOT=/home/i064196/devel/project-kb 2 | 3 | nano $PROJECT_ROOT/kaybee/VERSION 4 | 5 | echo "Enter to proceed releasing version: `cat $PROJECT_ROOT/kaybee/VERSION`" 6 | # echo $1 > $PROJECT_ROOT/kaybee/VERSION 7 | read 8 | 9 | RELEASE=`cat $PROJECT_ROOT/kaybee/VERSION` 10 | 11 | echo "Building..." 12 | make -C kaybee check build-all 13 | 14 | echo "Tagging as \"v$RELEASE\"..." 15 | git tag v$RELEASE -f 16 | 17 | echo "Creating changelog..." 18 | #> NEW-CHANGELOG.md 19 | #head -n1 CHANGELOG.md >> NEW-CHANGELOG.md 20 | #echo >> NEW-CHANGELOG.md 21 | $PROJECT_ROOT/scripts/changelog-gen.py > /tmp/CHANGELOG-${RELEASE}.md 22 | #cat /tmp/CHANGELOG-${RELEASE}.md >> NEW-CHANGELOG.md 23 | #tail -n +2 CHANGELOG.md >> NEW-CHANGELOG.md 24 | #mv NEW-CHANGELOG.md CHANGELOG.md 25 | 26 | echo "Creating commit for new release..." 27 | git add kaybee/VERSION 28 | git commit -m "release $RELEASE" 29 | git push 30 | 31 | echo "Updating tag..." 32 | git tag v$RELEASE -f 33 | git push --tags 34 | 35 | echo "Creating GH release" 36 | gh release create v$RELEASE \ 37 | kaybee/dist/kaybee-${RELEASE}_linux-amd64 \ 38 | kaybee/dist/kaybee-${RELEASE}_darwin-amd64 \ 39 | kaybee/dist/kaybee-${RELEASE}_win-amd64 \ 40 | --notes-file /tmp/CHANGELOG-${RELEASE}.md \ 41 | --title "v$RELEASE" 42 | 43 | echo "Update the version for the next relase cycle (Enter to proceed)" 44 | read 45 | nano $PROJECT_ROOT/kaybee/VERSION 46 | 47 | git add $PROJECT_ROOT/kaybee/VERSION 48 | RELEASE=`cat $PROJECT_ROOT/kaybee/VERSION` 49 | git commit -m "start working on version $RELEASE" 50 | 51 | echo "Done" 52 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | bibtexparser==2.0.0b7 2 | pylatexenc==3.0a25 3 | pyparsing==3.1.2 4 | -------------------------------------------------------------------------------- /vulnerability-data/README.md: -------------------------------------------------------------------------------- 1 | # Vulnerability data 2 | 3 | All vulnerability data is in the [`statements`](https://github.com/SAP/project-kb/tree/vulnerability-data/statements) directory in the dedicated branch "vulnerability-data". 4 | --------------------------------------------------------------------------------